File size: 1,236 Bytes
ea90e06
e683309
ea90e06
 
a09216c
28b7f46
a09216c
ea90e06
 
c59a4f8
ea90e06
fa1c92c
ea90e06
 
454fde7
 
ea90e06
ccc474b
454fde7
 
ea90e06
 
67ec166
dee9089
e7bd68e
67ec166
0af80fa
67ec166
0af80fa
30ba48b
ea90e06
caa1fd8
e7bd68e
a09216c
dee9089
c871b03
e7bd68e
ccc474b
7078b67
cd7dcf3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import numpy as np


tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", padding= True, truncation=True, max_length=128)
model = AutoModelForMaskedLM.from_pretrained("moussaKam/AraBART")

#@st.cache
def next_word(text, pipe):
    res_dict= {  
  'Word':[],
  'Score':[],
  }
    for e in pipe(text):
        res_dict['Word'].append(e['token_str'])
        res_dict['Score'].append(e['score'])
    return res_dict

st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
default_value = "بيعت الأسلحة في السوق"
# sent is the variable holding the user's input
sent = st.text_area("مدخل", default_value, height=20)

st.checkbox('استعمال الرسم البياني', value=False)

text_st = sent+ ' <mask>'

pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model)
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
df.reset_index(drop=True, inplace=True)

st.dataframe(df)


#st.table(df)