Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -168,8 +168,8 @@ def extract_text_from_image(image, temperature=0.2, stream=False):
|
|
| 168 |
yield cleaned_text
|
| 169 |
|
| 170 |
|
| 171 |
-
def process_input(file_input, temperature, page_num):
|
| 172 |
-
"""Process uploaded file (image or PDF) and extract text with streaming."""
|
| 173 |
if file_input is None:
|
| 174 |
yield "Please upload an image or PDF first.", "", "", None, gr.update()
|
| 175 |
return
|
|
@@ -197,8 +197,8 @@ def process_input(file_input, temperature, page_num):
|
|
| 197 |
return
|
| 198 |
|
| 199 |
try:
|
| 200 |
-
# Extract text using LightOnOCR with streaming
|
| 201 |
-
for extracted_text in extract_text_from_image(image_to_process, temperature, stream=
|
| 202 |
yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
|
| 203 |
|
| 204 |
except Exception as e:
|
|
@@ -233,7 +233,7 @@ with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
|
|
| 233 |
**💡 How to use:**
|
| 234 |
1. Upload an image or PDF
|
| 235 |
2. For PDFs: select which page to extract (1-20)
|
| 236 |
-
3. Adjust temperature if needed
|
| 237 |
4. Click "Extract Text"
|
| 238 |
|
| 239 |
**Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
|
|
@@ -277,6 +277,11 @@ with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
|
|
| 277 |
label="Temperature",
|
| 278 |
info="0.0 = deterministic, Higher = more varied"
|
| 279 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
submit_btn = gr.Button("Extract Text", variant="primary")
|
| 281 |
clear_btn = gr.Button("Clear", variant="secondary")
|
| 282 |
|
|
@@ -299,7 +304,7 @@ with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
|
|
| 299 |
# Event handlers
|
| 300 |
submit_btn.click(
|
| 301 |
fn=process_input,
|
| 302 |
-
inputs=[file_input, temperature, num_pages],
|
| 303 |
outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
|
| 304 |
)
|
| 305 |
|
|
|
|
| 168 |
yield cleaned_text
|
| 169 |
|
| 170 |
|
| 171 |
+
def process_input(file_input, temperature, page_num, enable_streaming):
|
| 172 |
+
"""Process uploaded file (image or PDF) and extract text with optional streaming."""
|
| 173 |
if file_input is None:
|
| 174 |
yield "Please upload an image or PDF first.", "", "", None, gr.update()
|
| 175 |
return
|
|
|
|
| 197 |
return
|
| 198 |
|
| 199 |
try:
|
| 200 |
+
# Extract text using LightOnOCR with optional streaming
|
| 201 |
+
for extracted_text in extract_text_from_image(image_to_process, temperature, stream=enable_streaming):
|
| 202 |
yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
|
| 203 |
|
| 204 |
except Exception as e:
|
|
|
|
| 233 |
**💡 How to use:**
|
| 234 |
1. Upload an image or PDF
|
| 235 |
2. For PDFs: select which page to extract (1-20)
|
| 236 |
+
3. Adjust temperature if needed
|
| 237 |
4. Click "Extract Text"
|
| 238 |
|
| 239 |
**Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
|
|
|
|
| 277 |
label="Temperature",
|
| 278 |
info="0.0 = deterministic, Higher = more varied"
|
| 279 |
)
|
| 280 |
+
enable_streaming = gr.Checkbox(
|
| 281 |
+
label="Enable Streaming",
|
| 282 |
+
value=False,
|
| 283 |
+
info="Show text progressively as it's generated"
|
| 284 |
+
)
|
| 285 |
submit_btn = gr.Button("Extract Text", variant="primary")
|
| 286 |
clear_btn = gr.Button("Clear", variant="secondary")
|
| 287 |
|
|
|
|
| 304 |
# Event handlers
|
| 305 |
submit_btn.click(
|
| 306 |
fn=process_input,
|
| 307 |
+
inputs=[file_input, temperature, num_pages, enable_streaming],
|
| 308 |
outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
|
| 309 |
)
|
| 310 |
|