Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# pip install gradio transformers optimum onnxruntime onnx beautifulsoup4 langdetect
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import requests
|
|
@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
|
|
| 6 |
import re
|
| 7 |
from requests.sessions import Session
|
| 8 |
from langdetect import detect
|
| 9 |
-
from
|
| 10 |
|
| 11 |
from transformers import AutoTokenizer
|
| 12 |
from optimum.onnxruntime import ORTModelForSeq2SeqLM
|
|
@@ -62,15 +62,15 @@ def scrape_visible_text_from_url(url, query_selector=None, email=None, password=
|
|
| 62 |
|
| 63 |
visible_text = re.sub(r'\s+', ' ', text_content).strip()
|
| 64 |
|
| 65 |
-
translator =
|
| 66 |
sentences = re.split(r'(?<=[.!?]) +', visible_text)
|
| 67 |
translated_sentences = []
|
| 68 |
for sentence in sentences:
|
| 69 |
try:
|
| 70 |
lang = detect(sentence)
|
| 71 |
if lang != 'en':
|
| 72 |
-
|
| 73 |
-
translated_sentences.append(
|
| 74 |
else:
|
| 75 |
translated_sentences.append(sentence)
|
| 76 |
except Exception:
|
|
|
|
| 1 |
+
# pip install gradio transformers optimum onnxruntime onnx beautifulsoup4 langdetect deep-translator requests torch
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import requests
|
|
|
|
| 6 |
import re
|
| 7 |
from requests.sessions import Session
|
| 8 |
from langdetect import detect
|
| 9 |
+
from deep_translator import GoogleTranslator
|
| 10 |
|
| 11 |
from transformers import AutoTokenizer
|
| 12 |
from optimum.onnxruntime import ORTModelForSeq2SeqLM
|
|
|
|
| 62 |
|
| 63 |
visible_text = re.sub(r'\s+', ' ', text_content).strip()
|
| 64 |
|
| 65 |
+
translator = GoogleTranslator(source='auto', target='en')
|
| 66 |
sentences = re.split(r'(?<=[.!?]) +', visible_text)
|
| 67 |
translated_sentences = []
|
| 68 |
for sentence in sentences:
|
| 69 |
try:
|
| 70 |
lang = detect(sentence)
|
| 71 |
if lang != 'en':
|
| 72 |
+
translated_sentence = translator.translate(sentence)
|
| 73 |
+
translated_sentences.append(translated_sentence)
|
| 74 |
else:
|
| 75 |
translated_sentences.append(sentence)
|
| 76 |
except Exception:
|