Spaces:

blazingbunny
/

BERT-Extractive-Summarizer

Sleeping

blazingbunny commited on Aug 26, 2024

Commit

f1e7200

verified ·

1 Parent(s): b42b34a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import streamlit as st
 from transformers import pipeline
 import re
-import nltk
-nltk.download('punkt')
-from nltk.tokenize import sent_tokenize
 st.title('Hugging Face BERT Summarizer')
@@ -29,8 +29,8 @@ if uploaded_file is not None and keywords:
     user_input = uploaded_file.read().decode('utf-8')
     keywords = [keyword.strip() for keyword in keywords.split(",")]
-    # Split text into sentences
-    sentences = sent_tokenize(user_input)
     # Filter sentences based on keywords
     filtered_sentences = [sentence for sentence in sentences if any(keyword.lower() in sentence.lower() for keyword in keywords)]

 import streamlit as st
 from transformers import pipeline
 import re
+def custom_sentence_splitter(text):
+    # Simple regex to split sentences by periods, exclamations, or question marks followed by a space
+    return re.split(r'(?<=[.!?]) +', text)
 st.title('Hugging Face BERT Summarizer')
     user_input = uploaded_file.read().decode('utf-8')
     keywords = [keyword.strip() for keyword in keywords.split(",")]
+    # Split text into sentences using the custom function
+    sentences = custom_sentence_splitter(user_input)
     # Filter sentences based on keywords
     filtered_sentences = [sentence for sentence in sentences if any(keyword.lower() in sentence.lower() for keyword in keywords)]