Hamda commited on
Commit
407249a
1 Parent(s): 538d7ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -4,6 +4,7 @@ from transformers import pipeline
4
  from transformers import AutoTokenizer, AutoModelForMaskedLM
5
  import pandas as pd
6
  import numpy as np
 
7
 
8
 
9
  tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
@@ -15,9 +16,13 @@ def next_word(text, pipe):
15
  'Word':[],
16
  'Score':[],
17
  }
 
 
 
18
  for e in pipe(text):
19
- res_dict['Word'].append(e['token_str'])
20
- res_dict['Score'].append(e['score'])
 
21
  return res_dict
22
 
23
  st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
@@ -29,7 +34,7 @@ st.checkbox('استعمال الرسم البياني', value=False)
29
 
30
  text_st = sent+ ' <mask>'
31
 
32
- pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model)
33
  dict_next_words = next_word(text_st, pipe)
34
  df = pd.DataFrame.from_dict(dict_next_words)
35
  df.reset_index(drop=True, inplace=True)
 
4
  from transformers import AutoTokenizer, AutoModelForMaskedLM
5
  import pandas as pd
6
  import numpy as np
7
+ import string
8
 
9
 
10
  tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
 
16
  'Word':[],
17
  'Score':[],
18
  }
19
+
20
+
21
+
22
  for e in pipe(text):
23
+ if e['token_str'] not in string.punctuation:
24
+ res_dict['Word'].append(e['token_str'])
25
+ res_dict['Score'].append(e['score'])
26
  return res_dict
27
 
28
  st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
 
34
 
35
  text_st = sent+ ' <mask>'
36
 
37
+ pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
38
  dict_next_words = next_word(text_st, pipe)
39
  df = pd.DataFrame.from_dict(dict_next_words)
40
  df.reset_index(drop=True, inplace=True)