Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,23 +1,51 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import pandas as pd
|
| 3 |
-
#import torch
|
| 4 |
from transformers import pipeline
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
def summary():
|
| 9 |
-
# st.title("DOCUMENT SUMMARIZER")
|
| 10 |
-
html_temp = """
|
| 11 |
-
<div style ="background-color:cyan;padding:13px">
|
| 12 |
-
<h1 style ="color:black;text-align:center;">PATENT SUMMARIZER</h1>
|
| 13 |
-
</div>
|
| 14 |
-
"""
|
| 15 |
-
st.markdown(html_temp, unsafe_allow_html = True)
|
| 16 |
-
my_file=st.text_area("Paste Document content", placeholder="Paste here",height=60)
|
| 17 |
-
result =""
|
| 18 |
-
if st.button("Summarize"):
|
| 19 |
-
result = summarizer(my_file)
|
| 20 |
-
st.success(result)
|
| 21 |
-
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
|
|
|
| 2 |
from transformers import pipeline
|
| 3 |
|
| 4 |
+
@st.cache(allow_output_mutation=True)
|
| 5 |
+
def load_summarizer():
|
| 6 |
+
model = pipeline("summarization", device=0)
|
| 7 |
+
return model
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
def generate_chunks(inp_str):
|
| 11 |
+
max_chunk = 500
|
| 12 |
+
inp_str = inp_str.replace('.', '.<eos>')
|
| 13 |
+
inp_str = inp_str.replace('?', '?<eos>')
|
| 14 |
+
inp_str = inp_str.replace('!', '!<eos>')
|
| 15 |
+
|
| 16 |
+
sentences = inp_str.split('<eos>')
|
| 17 |
+
current_chunk = 0
|
| 18 |
+
chunks = []
|
| 19 |
+
for sentence in sentences:
|
| 20 |
+
if len(chunks) == current_chunk + 1:
|
| 21 |
+
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
|
| 22 |
+
chunks[current_chunk].extend(sentence.split(' '))
|
| 23 |
+
else:
|
| 24 |
+
current_chunk += 1
|
| 25 |
+
chunks.append(sentence.split(' '))
|
| 26 |
+
else:
|
| 27 |
+
chunks.append(sentence.split(' '))
|
| 28 |
+
|
| 29 |
+
for chunk_id in range(len(chunks)):
|
| 30 |
+
chunks[chunk_id] = ' '.join(chunks[chunk_id])
|
| 31 |
+
return chunks
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
summarizer = load_summarizer()
|
| 35 |
+
st.title("Summarize Text")
|
| 36 |
+
sentence = st.text_area('Please paste your article :', height=30)
|
| 37 |
+
button = st.button("Summarize")
|
| 38 |
+
|
| 39 |
+
max = st.sidebar.slider('Select max', 50, 500, step=10, value=150)
|
| 40 |
+
min = st.sidebar.slider('Select min', 10, 450, step=10, value=50)
|
| 41 |
+
do_sample = st.sidebar.checkbox("Do sample", value=False)
|
| 42 |
+
with st.spinner("Generating Summary.."):
|
| 43 |
+
if button and sentence:
|
| 44 |
+
chunks = generate_chunks(sentence)
|
| 45 |
+
res = summarizer(chunks,
|
| 46 |
+
max_length=max,
|
| 47 |
+
min_length=min,
|
| 48 |
+
do_sample=do_sample)
|
| 49 |
+
text = ' '.join([summ['summary_text'] for summ in res])
|
| 50 |
+
# st.write(result[0]['summary_text'])
|
| 51 |
+
st.write(text)
|