Spaces:
Runtime error
Runtime error
add zh
Browse files
app.py
CHANGED
|
@@ -25,15 +25,19 @@ def download_file(filename):
|
|
| 25 |
|
| 26 |
NLTK = nltk_load(download_file('english.pickle'))
|
| 27 |
sent_cut_en = NLTK.tokenize
|
| 28 |
-
LR_GLTR_EN, LR_PPL_EN = [
|
| 29 |
pickle.load(open(download_file(f'{lang}-gpt2-{name}.pkl'), 'rb'))
|
| 30 |
-
for lang, name in [('en', 'gltr'), ('en', 'ppl')]
|
| 31 |
]
|
| 32 |
|
| 33 |
NAME_EN = 'gpt2'
|
| 34 |
TOKENIZER_EN = GPT2Tokenizer.from_pretrained(NAME_EN)
|
| 35 |
MODEL_EN = GPT2LMHeadModel.from_pretrained(NAME_EN)
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# code borrowed from https://github.com/blmoistawinde/HarvestText
|
| 39 |
def sent_cut_zh(para: str) -> List[str]:
|
|
@@ -143,7 +147,7 @@ def predict_en(text: str) -> List:
|
|
| 143 |
def predict_zh(text: str) -> List:
|
| 144 |
with torch.no_grad():
|
| 145 |
feat = gpt2_features(text, TOKENIZER_ZH, MODEL_ZH, sent_cut_zh)
|
| 146 |
-
out = lr_predict(*feat,
|
| 147 |
return out
|
| 148 |
|
| 149 |
|
|
@@ -208,10 +212,10 @@ with gr.Blocks() as demo:
|
|
| 208 |
value="对于OpenAI大力出奇迹的工作,自然每个人都有自己的看点。我自己最欣赏的地方是ChatGPT如何解决 “AI校正(Alignment)“这个问题。这个问题也是我们课题组这两年在探索的学术问题之一。"
|
| 209 |
)
|
| 210 |
button2 = gr.Button("🤖 预测!")
|
| 211 |
-
gr.Markdown("GLTR")
|
| 212 |
label2_gltr = gr.Textbox(lines=1, label='预测结果 🎃')
|
| 213 |
score2_gltr = gr.Textbox(lines=1, label='模型概率')
|
| 214 |
-
gr.Markdown("PPL")
|
| 215 |
label2_ppl = gr.Textbox(lines=1, label='PPL 预测结果 🎃')
|
| 216 |
score2_ppl = gr.Textbox(lines=1, label='PPL 模型概率')
|
| 217 |
|
|
|
|
| 25 |
|
| 26 |
NLTK = nltk_load(download_file('english.pickle'))
|
| 27 |
sent_cut_en = NLTK.tokenize
|
| 28 |
+
LR_GLTR_EN, LR_PPL_EN, LR_GLTR_ZH, LR_PPL_ZH = [
|
| 29 |
pickle.load(open(download_file(f'{lang}-gpt2-{name}.pkl'), 'rb'))
|
| 30 |
+
for lang, name in [('en', 'gltr'), ('en', 'ppl'), ('zh', 'gltr'), ('zh', 'ppl')]
|
| 31 |
]
|
| 32 |
|
| 33 |
NAME_EN = 'gpt2'
|
| 34 |
TOKENIZER_EN = GPT2Tokenizer.from_pretrained(NAME_EN)
|
| 35 |
MODEL_EN = GPT2LMHeadModel.from_pretrained(NAME_EN)
|
| 36 |
|
| 37 |
+
NAME_ZH = 'IDEA-CCNL/Wenzhong-GPT2-110M'
|
| 38 |
+
TOKENIZER_ZH = GPT2Tokenizer.from_pretrained(NAME_ZH)
|
| 39 |
+
MODEL_ZH = GPT2LMHeadModel.from_pretrained(NAME_ZH)
|
| 40 |
+
|
| 41 |
|
| 42 |
# code borrowed from https://github.com/blmoistawinde/HarvestText
|
| 43 |
def sent_cut_zh(para: str) -> List[str]:
|
|
|
|
| 147 |
def predict_zh(text: str) -> List:
|
| 148 |
with torch.no_grad():
|
| 149 |
feat = gpt2_features(text, TOKENIZER_ZH, MODEL_ZH, sent_cut_zh)
|
| 150 |
+
out = lr_predict(*feat, LR_GLTR_ZH, LR_PPL_ZH, ['人类', 'ChatGPT'])
|
| 151 |
return out
|
| 152 |
|
| 153 |
|
|
|
|
| 212 |
value="对于OpenAI大力出奇迹的工作,自然每个人都有自己的看点。我自己最欣赏的地方是ChatGPT如何解决 “AI校正(Alignment)“这个问题。这个问题也是我们课题组这两年在探索的学术问题之一。"
|
| 213 |
)
|
| 214 |
button2 = gr.Button("🤖 预测!")
|
| 215 |
+
gr.Markdown("GLTR (中文测试集准确率 86.39%)")
|
| 216 |
label2_gltr = gr.Textbox(lines=1, label='预测结果 🎃')
|
| 217 |
score2_gltr = gr.Textbox(lines=1, label='模型概率')
|
| 218 |
+
gr.Markdown("PPL (中文测试集准确率 59.04%, 持续优化中...)")
|
| 219 |
label2_ppl = gr.Textbox(lines=1, label='PPL 预测结果 🎃')
|
| 220 |
score2_ppl = gr.Textbox(lines=1, label='PPL 模型概率')
|
| 221 |
|