|
import streamlit as st |
|
import transformers |
|
from transformers import pipeline |
|
from transformers import AutoTokenizer, AutoModelForMaskedLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", padding= True, truncation=True, max_length=128) |
|
@st.cache |
|
def load_model(model_name): |
|
model = AutoModelForMaskedLM.from_pretrained(model_name) |
|
return model |
|
model = load_model("moussaKam/AraBART") |
|
|
|
|
|
@st.cache |
|
def next_word(text, pipe): |
|
res_dict= { |
|
'token_str':[], |
|
'score':[], |
|
} |
|
res=pipe(text) |
|
for e in res: |
|
res_dict['token_str'].extend(e['token_str']) |
|
res_dict['score'].extend(e['score']) |
|
return res_dict |
|
|
|
st.title("Predict Next Word") |
|
st.write("Use our model to expand your query based on the DB content") |
|
default_value = "التاريخ هو تحليل و" |
|
|
|
sent = st.text_area("Text", default_value, height = 60) |
|
sent += ' <mask>' |
|
|
|
pipe = pipeline("fill-mask", tokenizer = tokenizer, model = model) |
|
dict_next_words = next_word(sent, pipe) |
|
|
|
st.write(dict_next_words) |