File size: 4,365 Bytes
ea90e06 e683309 ea90e06 a09216c 407249a 365710f 07344b9 fca740f a99b14a 6a4d258 5d2b0fe 12fc412 1dae260 5f8bbd6 12fc412 a99b14a 12fc412 538d7ca 080e0b4 5d2b0fe 7e76459 080e0b4 f096fbb 080e0b4 635e801 080e0b4 b81dada f096fbb a8e534e eccd375 830785a b81dada e9c1ddf 79c67dd a8e534e 05d75cd 61fabcb 2291a30 79c67dd c3cfbed a8e534e 83a58a9 b9b338b ea2e94d 83a58a9 a8e534e c3cfbed a8e534e d914ade ea2e94d d914ade a8e534e be34b4b a8e534e 1549b96 a9ed1fc 1549b96 a9ed1fc 1549b96 a9ed1fc 1549b96 a8e534e 704ac1c a8e534e 286e8f3 a8e534e 22fefa5 830785a 7e76459 eccd375 cf44ec1 07344b9 f58aaf4 1dae260 eccd375 07344b9 1dae260 07344b9 f58aaf4 5f846a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import streamlit as st
import transformers
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
import string
from time import time
from PIL import Image
image = Image.open('./Logo_APP.png')
n_image = image.resize((150, 150))
st.image(n_image)
st.title("المساعدة اللغوية في التنبؤ بالمتلازمات والمتصاحبات وتصحيحها")
default_value = "أستاذ التعليم"
# sent is the variable holding the user's input
sent = st.text_area('المدخل',default_value)
tokenizer = AutoTokenizer.from_pretrained("moussaKam/AraBART", max_length=128, padding=True, pad_to_max_length = True, truncation=True)
model = AutoModelForMaskedLM.from_pretrained("Hamda/test-1-finetuned-AraBART")
pipe = pipeline("fill-mask", tokenizer=tokenizer, model=model, top_k=10)
def next_word(text, pipe):
res_dict= {
'الكلمة المقترحة':[],
'العلامة':[],
}
for e in pipe(text):
if all(c not in list(string.punctuation) for c in e['token_str']):
res_dict['الكلمة المقترحة'].append(e['token_str'])
res_dict['العلامة'].append(e['score'])
return res_dict
if (st.button('بحث', disabled=False)):
text_st = sent+ ' <mask>'
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
st.dataframe(df)
#using Graph
if (st.checkbox('الاستعانة بالرسم البياني المعرفي الاحتمالي', value=False)):
a = time()
VocMap = './voc.csv'
ScoreMap = './BM25.csv'
#@st.cache
def reading_df(path1, path2):
df_voc = pd.read_csv(path1, delimiter='\t')
df_graph = pd.read_csv(path2, delimiter='\t')
df_graph.set_index(['ID1','ID2'], inplace=True)
df_gr = pd.read_csv(ScoreMap, delimiter='\t')
df_gr.set_index(['ID1'], inplace=True)
return df_voc, df_graph, df_gr
df3, df_g, df_in = reading_df(VocMap, ScoreMap)
def Query2id(voc, query):
res= []
for word in query.split():
try:
res.append(voc.index[voc['word'] == word].values[0])
except (IndexError, KeyError) as e:
st.write('Token not found')
continue
return res
id_list = Query2id(df3, sent)
def setQueriesVoc(df, id_list):
res = []
for e in id_list:
try:
res.extend(list(df.loc[e]['ID2'].values))
except (KeyError, AttributeError) as f:
st.write('Token not found')
continue
return list(set(res))
L = setQueriesVoc(df_in, id_list)
@st.cache
def compute_score(L_terms, id_l):
tmt = {}
for nc in L_terms:
score = 0.0
temp = []
for ni in id_l:
try:
score = score + df_g.loc[(ni, nc),'score']
except KeyError:
continue
key = df3.loc[nc].values[0]
tmt[key] = score
return tmt
tmt = compute_score(L, id_list)
exp_terms = []
t_li = tmt.values()
tmexp = sorted(tmt.items(), key=lambda x: x[1], reverse=True)
i = 0
dict_res = {'الكلمة المقترحة':[],
'العلامة':[]}
for key, value in tmexp:
new_score=((value-min(t_li))/(max(t_li)-min(t_li)))-0.0001
dict_res['العلامة'].append(str(new_score)[:6])
dict_res['الكلمة المقترحة'].append(key)
i+=1
if (i==10):
break
res_df = pd.DataFrame.from_dict(dict_res)
res_df.index += 1
b = time()
exec_time = (b-a)
text_st = sent+ ' <mask>'
dict_next_words = next_word(text_st, pipe)
df = pd.DataFrame.from_dict(dict_next_words)
df.index += 1
str_time = str(exec_time)[:3]
st.markdown("""---""")
st.header("الكلمات المقترحة باستعمال النموذج اللغوي")
st.dataframe(df)
st.markdown("""---""")
st.header("الكلمات المقترحة باستعمال الرسم البياني")
st.dataframe(res_df)
st.markdown("""---""")
st.write(f'{str_time} s :الوقت المستغرق باستعمال الرسم البياني') |