|
import streamlit as st |
|
import transformers |
|
from transformers import pipeline |
|
from transformers import AutoTokenizer, AutoModelForMaskedLM |
|
import pandas as pd |
|
import numpy as np |
|
import string |
|
|
|
device = 'cuda' |
|
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True) |
|
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART").to(device) |
|
|
|
|
|
def next_word(text, pipe): |
|
res_dict= { |
|
'Word':[], |
|
'Score':[], |
|
} |
|
for e in pipe(text): |
|
if all(c not in list(string.punctuation) for c in e['token_str']): |
|
res_dict['Word'].append(e['token_str']) |
|
res_dict['Score'].append(e['score']) |
|
return res_dict |
|
|
|
st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها") |
|
default_value = "بيعت الأسلحة في السوق" |
|
|
|
sent = st.text_area("مدخل", default_value, height=20) |
|
|
|
st.checkbox('استعمال الرسم البياني', value=False) |
|
|
|
text_st = sent+ ' <mask>' |
|
|
|
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=15) |
|
dict_next_words = next_word(text_st, pipe) |
|
df = pd.DataFrame.from_dict(dict_next_words) |
|
df.reset_index(drop=True, inplace=True) |
|
|
|
st.dataframe(df) |
|
|
|
|
|
|