LightOnOCR

Paused

App Files Files Community

IFMedTechdemo commited on 24 days ago

Commit

1382c6e

verified ·

1 Parent(s): c2a331b

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -51

app.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import subprocess
 import sys
-import threading
 import spaces
 import torch
@@ -14,7 +13,6 @@ import pypdfium2 as pdfium
 from transformers import (
     LightOnOCRForConditionalGeneration,
     LightOnOCRProcessor,
-    TextIteratorStreamer,
 )
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
@@ -108,7 +106,7 @@ def clean_output_text(text):
 @spaces.GPU
-def extract_text_from_image(image, temperature=0.2, stream=False):
     """Extract text from image using LightOnOCR model, and run clinical NER."""
     # Prepare the chat format
     chat = [
@@ -149,55 +147,35 @@ def extract_text_from_image(image, temperature=0.2, stream=False):
         do_sample=temperature > 0,
     )
-    if stream:
-        # Streaming generation
-        streamer = TextIteratorStreamer(
-            processor.tokenizer,
-            skip_prompt=True,
-            skip_special_tokens=True,
-        )
-        generation_kwargs["streamer"] = streamer
-        thread = threading.Thread(target=ocr_model.generate, kwargs=generation_kwargs)
-        thread.start()
-        full_text = ""
-        for new_text in streamer:
-            full_text += new_text
-            cleaned_text = clean_output_text(full_text)
-            # For streaming, we’ll only show text progressively,
-            # and keep medications empty (or compute at the end if you prefer).
-            yield cleaned_text, ""
-        thread.join()
-    else:
-        # Non-streaming generation
-        with torch.no_grad():
-            outputs = ocr_model.generate(**generation_kwargs)
-        output_text = processor.decode(outputs[0], skip_special_tokens=True)
-        cleaned_text = clean_output_text(output_text)
-        # Clinical NER on the full cleaned text
-        entities = ner_pipeline(cleaned_text)
-        medications = []
-        for ent in entities:
-            if ent["entity_group"] == "treatment":
-                word = ent["word"]
-                if word.startswith("##") and medications:
-                    medications[-1] += word[2:]
-                else:
-                    medications.append(word)
-        medications_str = ", ".join(set(medications)) if medications else "None detected"
-        yield cleaned_text, medications_str
-def process_input(file_input, temperature, page_num, enable_streaming):
     """Process uploaded file (image or PDF) and extract text with optional streaming."""
     if file_input is None:
         # 6 outputs: [output_text, medications_output, raw_output, page_info, rendered_image, num_pages]
@@ -233,7 +211,7 @@ def process_input(file_input, temperature, page_num, enable_streaming):
     try:
         # Extract text using LightOnOCR with optional streaming
         for extracted_text, medications in extract_text_from_image(
-            image_to_process, temperature, stream=enable_streaming
         ):
             raw_md = extracted_text  # or you can keep a different raw version
             # 6 outputs: markdown_text, medications, raw_output, page_info, image, slider
@@ -318,12 +296,6 @@ with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
                 label="Temperature",
                 info="0.0 = deterministic, Higher = more varied"
             )
-            enable_streaming = gr.Checkbox(
-                label="Enable Streaming",
-                value=True,
-                info="Show text progressively as it's generated"
-            )
-            submit_btn = gr.Button("Extract Text", variant="primary")
             clear_btn = gr.Button("Clear", variant="secondary")
         with gr.Column(scale=2):
@@ -353,7 +325,7 @@ with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
     # Event handlers
     submit_btn.click(
     fn=process_input,
-    inputs=[file_input, temperature, num_pages, enable_streaming],
     outputs=[output_text, medications_output, raw_output, page_info, rendered_image, num_pages]
 )

 import subprocess
 import sys
 import spaces
 import torch
 from transformers import (
     LightOnOCRForConditionalGeneration,
     LightOnOCRProcessor,
 )
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
 @spaces.GPU
+def extract_text_from_image(image, temperature=0.2):
     """Extract text from image using LightOnOCR model, and run clinical NER."""
     # Prepare the chat format
     chat = [
         do_sample=temperature > 0,
     )
+    # Non-streaming generation
+    with torch.no_grad():
+        outputs = ocr_model.generate(**generation_kwargs)
+    output_text = processor.decode(outputs[0], skip_special_tokens=True)
+    cleaned_text = clean_output_text(output_text)
+    print("\n this is cleaned_text",cleaned_text )
+    # Clinical NER on the full cleaned text
+    entities = ner_pipeline(cleaned_text)
+    print("\n this is entity",entities)
+    medications = []
+    for ent in entities:
+        if ent["entity_group"] == "treatment":
+            word = ent["word"]
+            if word.startswith("##") and medications:
+                medications[-1] += word[2:]
+            else:
+                medications.append(word)
+    medications_str = ", ".join(set(medications)) if medications else "None detected"
+    yield cleaned_text, medications_str
+def process_input(file_input, temperature, page_num):
     """Process uploaded file (image or PDF) and extract text with optional streaming."""
     if file_input is None:
         # 6 outputs: [output_text, medications_output, raw_output, page_info, rendered_image, num_pages]
     try:
         # Extract text using LightOnOCR with optional streaming
         for extracted_text, medications in extract_text_from_image(
+            image_to_process, temperature
         ):
             raw_md = extracted_text  # or you can keep a different raw version
             # 6 outputs: markdown_text, medications, raw_output, page_info, image, slider
                 label="Temperature",
                 info="0.0 = deterministic, Higher = more varied"
             )
             clear_btn = gr.Button("Clear", variant="secondary")
         with gr.Column(scale=2):
     # Event handlers
     submit_btn.click(
     fn=process_input,
+    inputs=[file_input, temperature, num_pages, ],
     outputs=[output_text, medications_output, raw_output, page_info, rendered_image, num_pages]
 )