File size: 1,391 Bytes
ea90e06
e683309
ea90e06
 
a09216c
28b7f46
407249a
a09216c
5d2b0fe
538d7ca
5d2b0fe
ea90e06
d8f9f62
ea90e06
 
4bb560b
 
 
ccc474b
4bb560b
407249a
 
ea90e06
 
67ec166
dee9089
e7bd68e
67ec166
0af80fa
67ec166
0af80fa
30ba48b
ea90e06
5d2b0fe
e7bd68e
a09216c
dee9089
c871b03
e7bd68e
ccc474b
7078b67
cd7dcf3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import numpy as np
import string


tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")

#@st.cache
def next_word(text, pipe):
    res_dict= {  
      'Word':[],
      'Score':[],
    }
    for e in pipe(text):
        if all(c not in list(string.punctuation) for c in e['token_str']):
            res_dict['Word'].append(e['token_str'])
            res_dict['Score'].append(e['score'])
    return res_dict

st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
default_value = "بيعت الأسلحة في السوق"
# sent is the variable holding the user's input
sent = st.text_area("مدخل", default_value, height=20)

st.checkbox('استعمال الرسم البياني', value=False)

text_st = sent+ ' <mask>'

pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
df.reset_index(drop=True, inplace=True)

st.dataframe(df)


#st.table(df)