File size: 1,417 Bytes
ea90e06
e683309
ea90e06
 
a09216c
28b7f46
407249a
a09216c
7a14486
538d7ca
7a14486
ea90e06
fa1c92c
ea90e06
 
4bb560b
 
 
ccc474b
4bb560b
407249a
 
ea90e06
 
67ec166
dee9089
e7bd68e
67ec166
0af80fa
67ec166
0af80fa
30ba48b
ea90e06
7a14486
e7bd68e
a09216c
dee9089
c871b03
e7bd68e
ccc474b
7078b67
cd7dcf3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import numpy as np
import string

device = 'cuda'
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART").to(device)

#@st.cache
def next_word(text, pipe):
    res_dict= {  
      'Word':[],
      'Score':[],
    }
    for e in pipe(text):
        if all(c not in list(string.punctuation) for c in e['token_str']):
            res_dict['Word'].append(e['token_str'])
            res_dict['Score'].append(e['score'])
    return res_dict

st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات والتعبيرات الاصطلاحية وتصحيحها")
default_value = "بيعت الأسلحة في السوق"
# sent is the variable holding the user's input
sent = st.text_area("مدخل", default_value, height=20)

st.checkbox('استعمال الرسم البياني', value=False)

text_st = sent+ ' <mask>'

pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=15)
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
df.reset_index(drop=True, inplace=True)

st.dataframe(df)


#st.table(df)