RHM-text-summarizer-light

Paused

App Files Files Community

ar08 commited on Jun 23

Commit

17947a0

verified ·

1 Parent(s): 154213a

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -21

app.py CHANGED Viewed

@@ -1,44 +1,53 @@
 # Required: pip install gradio transformers accelerate optimum onnxruntime onnx
 import gradio as gr
 from transformers import AutoTokenizer
 from optimum.onnxruntime import ORTModelForSeq2SeqLM
 from optimum.pipelines import pipeline
-# Load ONNX model and tokenizer
 model_name = "Rahmat82/t5-small-finetuned-summarization-xsum"
-model = ORTModelForSeq2SeqLM.from_pretrained(model_name, export=True)
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
-# Create summarizer pipeline
-summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device_map="auto", batch_size=12)
-# Summarization function with max input tokens and medium summary length
 def summarize_text(text):
-    if not text.strip():
         return "Please enter some text."
-    # Tokenize and truncate to 1024 tokens
-    inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
-    input_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
-    # Generate medium-length summary
-    result = summarizer(
-        input_text,
-        min_length=100,   # 👈 medium minimum length
-        max_length=120,  # 👈 medium maximum length
         do_sample=False
     )
-    return result[0]["summary_text"]
-# Gradio app
 app = gr.Interface(
     fn=summarize_text,
-    inputs=gr.Textbox(lines=15, placeholder="Paste your text here...", label="Input Text"),
     outputs=gr.Textbox(label="Summary"),
-    title="🚀 ONNX-Powered T5 Summarizer (Medium Summary)",
-    description="Summarize long text into a medium-length summary using an ONNX-accelerated T5-small model (max input: 1024 tokens)"
 )
-# Launch
-app.launch()

 # Required: pip install gradio transformers accelerate optimum onnxruntime onnx
 import gradio as gr
+import torch
 from transformers import AutoTokenizer
 from optimum.onnxruntime import ORTModelForSeq2SeqLM
 from optimum.pipelines import pipeline
+# Load ONNX-optimized model and tokenizer
 model_name = "Rahmat82/t5-small-finetuned-summarization-xsum"
+model = ORTModelForSeq2SeqLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+# Build a fast summarization pipeline
+device = 0 if torch.cuda.is_available() else -1
+summarizer = pipeline(
+    task="summarization",
+    model=model,
+    tokenizer=tokenizer,
+    device=device,
+    batch_size=16,  # increased batch size for higher throughput
+)
+# Speed-optimized summarization function
 def summarize_text(text):
+    text = text.strip()
+    if not text:
         return "Please enter some text."
+    # Encode with truncation (max_length=1024)
+    inputs = tokenizer.encode(text, max_length=1024, truncation=True, return_tensors="pt")
+    decoded_input = tokenizer.decode(inputs[0], skip_special_tokens=True)
+    # Generate summary with tighter bounds
+    summary = summarizer(
+        decoded_input,
+        min_length=69,   # lower min length for faster generation
+        max_length=120,
         do_sample=False
     )
+    return summary[0]["summary_text"]
+# Gradio interface
 app = gr.Interface(
     fn=summarize_text,
+    inputs=gr.Textbox(lines=12, placeholder="Paste long text here...", label="Input Text"),
     outputs=gr.Textbox(label="Summary"),
+    title="⚡ Fast ONNX T5 Summarizer",
+    description="ONNX-accelerated T5-small model for quick, medium-length summarization (up to 1,024 tokens)."
 )
+if __name__ == "__main__":
+    app.launch()