File size: 1,417 Bytes
ea90e06 e683309 ea90e06 a09216c 28b7f46 407249a a09216c 7a14486 538d7ca 7a14486 ea90e06 fa1c92c ea90e06 4bb560b ccc474b 4bb560b 407249a ea90e06 67ec166 dee9089 e7bd68e 67ec166 0af80fa 67ec166 0af80fa 30ba48b ea90e06 7a14486 e7bd68e a09216c dee9089 c871b03 e7bd68e ccc474b 7078b67 cd7dcf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import numpy as np
import string
device = 'cuda'
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART").to(device)
#@st.cache
def next_word(text, pipe):
res_dict= {
'Word':[],
'Score':[],
}
for e in pipe(text):
if all(c not in list(string.punctuation) for c in e['token_str']):
res_dict['Word'].append(e['token_str'])
res_dict['Score'].append(e['score'])
return res_dict
st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
default_value = "بيعت الأسلحة في السوق"
# sent is the variable holding the user's input
sent = st.text_area("مدخل", default_value, height=20)
st.checkbox('استعمال الرسم البياني', value=False)
text_st = sent+ ' <mask>'
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=15)
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
df.reset_index(drop=True, inplace=True)
st.dataframe(df)
#st.table(df) |