Spaces:
Runtime error
Runtime error
| import nltk | |
| nltk.download("punkt") | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM | |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext | |
| from llama_index.llms import HuggingFaceLLM | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| import os | |
| # Load DeepSeek-R1 | |
| deepseek_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True) | |
| deepseek_model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True) | |
| # Load IndicBART | |
| indicbart_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBART") | |
| indicbart_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART") | |
| # Initialize LlamaIndex components | |
| embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| llm = HuggingFaceLLM( | |
| model_name="deepseek-ai/DeepSeek-R1", | |
| tokenizer_name="deepseek-ai/DeepSeek-R1", | |
| max_new_tokens=512, | |
| context_window=4096 | |
| ) | |
| service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model) | |
| # Build index from documents in 'data' directory | |
| documents = SimpleDirectoryReader("data").load_data() | |
| index = VectorStoreIndex.from_documents(documents, service_context=service_context) | |
| # Define functions for each task | |
| def restore_text(input_text, task_type): | |
| prefix_map = { | |
| "Restore & Correct Tamil Text": "restore: ", | |
| "Summarize in Tamil": "summarize: ", | |
| "Translate to English": "translate Tamil to English: " | |
| } | |
| prefix = prefix_map.get(task_type, "restore: ") | |
| input_text = prefix + input_text | |
| inputs = indicbart_tokenizer([input_text], return_tensors="pt", padding=True) | |
| outputs = indicbart_model.generate(**inputs, max_length=256, num_beams=4, early_stopping=True) | |
| decoded_output = indicbart_tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
| return decoded_output[0] | |
| def deepseek_chat(message): | |
| inputs = deepseek_tokenizer.encode(message + deepseek_tokenizer.eos_token, return_tensors="pt") | |
| outputs = deepseek_model.generate(inputs, max_length=1024, pad_token_id=deepseek_tokenizer.eos_token_id) | |
| return deepseek_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| def query_documents(query): | |
| query_engine = index.as_query_engine() | |
| response = query_engine.query(query) | |
| return str(response) | |
| # Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🕉️ Ancient Tamil Literature Expert AI") | |
| with gr.Tab("IndicBART Tasks"): | |
| input_text = gr.Textbox(label="Input Tamil Text", lines=8, placeholder="Enter ancient Tamil text here...") | |
| task_type = gr.Radio(choices=["Restore & Correct Tamil Text", "Summarize in Tamil", "Translate to English"], label="Select Task") | |
| output_text = gr.Textbox(label="Output") | |
| submit_button = gr.Button("Submit") | |
| submit_button.click(fn=restore_text, inputs=[input_text, task_type], outputs=output_text) | |
| with gr.Tab("DeepSeek-R1 Chat"): | |
| chat_input = gr.Textbox(label="Enter your message") | |
| chat_output = gr.Textbox(label="DeepSeek-R1 Response") | |
| chat_button = gr.Button("Send") | |
| chat_button.click(fn=deepseek_chat, inputs=chat_input, outputs=chat_output) | |
| with gr.Tab("Document Query"): | |
| query_input = gr.Textbox(label="Enter your query") | |
| query_output = gr.Textbox(label="Query Response") | |
| query_button = gr.Button("Search") | |
| query_button.click(fn=query_documents, inputs=query_input, outputs=query_output) | |
| if __name__ == "__main__": | |
| demo.launch() | |