Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,22 +4,22 @@ from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
|
|
| 4 |
import pandas as pd
|
| 5 |
import pdfplumber
|
| 6 |
|
| 7 |
-
#
|
| 8 |
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
|
| 9 |
-
retriever = RagRetriever.from_pretrained("facebook/wiki_dpr", use_dummy_dataset=True)
|
| 10 |
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
|
| 11 |
|
| 12 |
-
#
|
| 13 |
def extract_text_from_pdf(pdf_file):
|
|
|
|
| 14 |
with pdfplumber.open(pdf_file) as pdf:
|
| 15 |
-
text = ""
|
| 16 |
for page in pdf.pages:
|
| 17 |
page_text = page.extract_text()
|
| 18 |
if page_text:
|
| 19 |
text += page_text + "\n"
|
| 20 |
return text.strip()
|
| 21 |
|
| 22 |
-
# Streamlit
|
| 23 |
st.title("RAG-Powered PDF & CSV Chatbot")
|
| 24 |
|
| 25 |
# CSV file upload
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import pdfplumber
|
| 6 |
|
| 7 |
+
# Load the RAG model and tokenizer
|
| 8 |
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
|
| 9 |
+
retriever = RagRetriever.from_pretrained("facebook/wiki_dpr", use_dummy_dataset=True)
|
| 10 |
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
|
| 11 |
|
| 12 |
+
# Function to extract text from a PDF file
|
| 13 |
def extract_text_from_pdf(pdf_file):
|
| 14 |
+
text = ""
|
| 15 |
with pdfplumber.open(pdf_file) as pdf:
|
|
|
|
| 16 |
for page in pdf.pages:
|
| 17 |
page_text = page.extract_text()
|
| 18 |
if page_text:
|
| 19 |
text += page_text + "\n"
|
| 20 |
return text.strip()
|
| 21 |
|
| 22 |
+
# Streamlit app
|
| 23 |
st.title("RAG-Powered PDF & CSV Chatbot")
|
| 24 |
|
| 25 |
# CSV file upload
|