Commit
·
4b1ed8b
1
Parent(s):
20be358
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from transformers import pipeline
|
| 3 |
import textwrap
|
|
|
|
| 4 |
|
| 5 |
st.title('Hugging Face BERT Summarizer')
|
| 6 |
|
|
@@ -12,18 +13,27 @@ model = st.sidebar.selectbox("Choose a model", models)
|
|
| 12 |
|
| 13 |
uploaded_file = st.file_uploader("Choose a .txt file", type="txt")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
# Add slider to the sidebar for the scale value
|
| 16 |
scale_percentage = st.sidebar.slider('Scale %', min_value=1, max_value=100, value=50)
|
| 17 |
|
| 18 |
-
if uploaded_file is not None:
|
| 19 |
user_input = uploaded_file.read().decode('utf-8')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
if st.button('Summarize'):
|
| 22 |
summarizer = pipeline('summarization', model=model)
|
| 23 |
summarized_text = ""
|
| 24 |
|
| 25 |
-
# Split the text into chunks of approximately 500 words each
|
| 26 |
-
chunks = textwrap.wrap(
|
| 27 |
|
| 28 |
# Summarize each chunk
|
| 29 |
for chunk in chunks:
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from transformers import pipeline
|
| 3 |
import textwrap
|
| 4 |
+
import re
|
| 5 |
|
| 6 |
st.title('Hugging Face BERT Summarizer')
|
| 7 |
|
|
|
|
| 13 |
|
| 14 |
uploaded_file = st.file_uploader("Choose a .txt file", type="txt")
|
| 15 |
|
| 16 |
+
# Add text input for keywords
|
| 17 |
+
keywords = st.text_input("Enter keywords (comma-separated)")
|
| 18 |
+
|
| 19 |
# Add slider to the sidebar for the scale value
|
| 20 |
scale_percentage = st.sidebar.slider('Scale %', min_value=1, max_value=100, value=50)
|
| 21 |
|
| 22 |
+
if uploaded_file is not None and keywords:
|
| 23 |
user_input = uploaded_file.read().decode('utf-8')
|
| 24 |
+
keywords = [keyword.strip() for keyword in keywords.split(",")]
|
| 25 |
+
|
| 26 |
+
# Filter sentences based on keywords
|
| 27 |
+
sentences = re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', user_input)
|
| 28 |
+
filtered_sentences = [sentence for sentence in sentences if any(keyword.lower() in sentence.lower() for keyword in keywords)]
|
| 29 |
+
filtered_text = ' '.join(filtered_sentences)
|
| 30 |
|
| 31 |
if st.button('Summarize'):
|
| 32 |
summarizer = pipeline('summarization', model=model)
|
| 33 |
summarized_text = ""
|
| 34 |
|
| 35 |
+
# Split the filtered text into chunks of approximately 500 words each
|
| 36 |
+
chunks = textwrap.wrap(filtered_text, 500)
|
| 37 |
|
| 38 |
# Summarize each chunk
|
| 39 |
for chunk in chunks:
|