Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from transformers import pipeline
|
|
4 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
|
|
7 |
|
8 |
|
9 |
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
|
@@ -15,9 +16,13 @@ def next_word(text, pipe):
|
|
15 |
'Word':[],
|
16 |
'Score':[],
|
17 |
}
|
|
|
|
|
|
|
18 |
for e in pipe(text):
|
19 |
-
|
20 |
-
|
|
|
21 |
return res_dict
|
22 |
|
23 |
st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
|
@@ -29,7 +34,7 @@ st.checkbox('استعمال الرسم البياني', value=False)
|
|
29 |
|
30 |
text_st = sent+ ' <mask>'
|
31 |
|
32 |
-
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model)
|
33 |
dict_next_words = next_word(text_st, pipe)
|
34 |
df = pd.DataFrame.from_dict(dict_next_words)
|
35 |
df.reset_index(drop=True, inplace=True)
|
|
|
4 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
7 |
+
import string
|
8 |
|
9 |
|
10 |
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
|
|
|
16 |
'Word':[],
|
17 |
'Score':[],
|
18 |
}
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
for e in pipe(text):
|
23 |
+
if e['token_str'] not in string.punctuation:
|
24 |
+
res_dict['Word'].append(e['token_str'])
|
25 |
+
res_dict['Score'].append(e['score'])
|
26 |
return res_dict
|
27 |
|
28 |
st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
|
|
|
34 |
|
35 |
text_st = sent+ ' <mask>'
|
36 |
|
37 |
+
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
|
38 |
dict_next_words = next_word(text_st, pipe)
|
39 |
df = pd.DataFrame.from_dict(dict_next_words)
|
40 |
df.reset_index(drop=True, inplace=True)
|