File size: 1,236 Bytes
ea90e06 e683309 ea90e06 a09216c 28b7f46 a09216c ea90e06 c59a4f8 ea90e06 fa1c92c ea90e06 454fde7 ea90e06 ccc474b 454fde7 ea90e06 67ec166 dee9089 e7bd68e 67ec166 0af80fa 67ec166 0af80fa 30ba48b ea90e06 caa1fd8 e7bd68e a09216c dee9089 c871b03 e7bd68e ccc474b 7078b67 cd7dcf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import numpy as np
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", padding= True, truncation=True, max_length=128)
model = AutoModelForMaskedLM.from_pretrained("moussaKam/AraBART")
#@st.cache
def next_word(text, pipe):
res_dict= {
'Word':[],
'Score':[],
}
for e in pipe(text):
res_dict['Word'].append(e['token_str'])
res_dict['Score'].append(e['score'])
return res_dict
st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
default_value = "بيعت الأسلحة في السوق"
# sent is the variable holding the user's input
sent = st.text_area("مدخل", default_value, height=20)
st.checkbox('استعمال الرسم البياني', value=False)
text_st = sent+ ' <mask>'
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model)
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
df.reset_index(drop=True, inplace=True)
st.dataframe(df)
#st.table(df) |