Bofandra commited on
Commit
7ab5cc9
1 Parent(s): 449ae19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ import pandas as pd
4
+ import pickle
5
+ from pathlib import Path
6
+
7
+ def make_clickable_both(val):
8
+ name, url = val.split('#')
9
+ print(name+"\n")
10
+ print(url+"\n")
11
+ return f'<a href="{url}">{name}</a>'
12
+
13
+ def find(query):
14
+ def get_detailed_instruct(task_description: str, query: str) -> str:
15
+ return f'Instruct: {task_description}\nQuery: {query}'
16
+
17
+ # Each query must come with a one-sentence instruction that describes the task
18
+ task = 'Given a web search query, retrieve relevant passages that answer the query'
19
+ queries = [
20
+ get_detailed_instruct(task, query)
21
+ ]
22
+ print("cekpoin0\n")
23
+
24
+ quran = pd.read_csv('quran-simple-clean.txt', delimiter="|")
25
+
26
+ file = open('quran-splitted.sav','rb')
27
+ quran_splitted = pickle.load(file)
28
+
29
+ model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
30
+
31
+ documents = quran_splitted['text'].tolist()
32
+ # document_embeddings = model.encode(documents, convert_to_tensor=True, normalize_embeddings=True)
33
+ # filename = 'encoded_quran_text_split_multilingual-e5-large-instruct.sav'
34
+ # pickle.dump(embeddings, open(filename, 'wb'))
35
+ file = open('encoded_quran_text_split_multilingual-e5-large-instructs.sav','rb')
36
+ document_embeddings = pickle.load(file)
37
+ print("cekpoin1\n")
38
+
39
+ query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
40
+ scores = (query_embeddings @ document_embeddings.T) * 100
41
+ print("cekpoin2\n")
42
+
43
+ # insert the similarity value to dataframe & sort it
44
+ file = open('quran-splitted.sav','rb')
45
+ quran_splitted = pickle.load(file)
46
+ quran_splitted['similarity'] = scores.tolist()[0]
47
+ sorted_quran = quran_splitted.sort_values(by='similarity', ascending=False)
48
+ print("cekpoin3\n")
49
+
50
+ #results = ""
51
+ results = pd.DataFrame()
52
+ i = 0
53
+ while i<20:
54
+ result = sorted_quran.iloc[i]
55
+ result_quran = quran.loc[(quran['sura']==result['sura']) & (quran['aya']==result['aya'])]
56
+ results = pd.concat([results, result_quran])
57
+ #results = results + result_quran['text'].item()+" (Q.S "+str(result['sura']).rstrip('.0')+":"+str(result['aya']).rstrip('.0')+")\n"
58
+ i=i+1
59
+
60
+ url = 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
61
+ results['text'] = '<a href="'+url+'">'+results['text']+'</a>'
62
+ #results['text'] = results['text'] + '#' + 'https://quran.com/'+results['sura'].astype(str)+':'+results['aya'].astype(str)+'/tafsirs/en-tafisr-ibn-kathir'
63
+
64
+ #results = results.style.format({'text': make_clickable_both})
65
+
66
+ #return sorted_quran
67
+ filepath = Path(query+'.csv')
68
+ results.to_csv(filepath,index=False)
69
+ return results, filepath
70
+
71
+ demo = gr.Interface(
72
+ fn=find,
73
+ inputs="textbox",
74
+ outputs=[gr.Dataframe(headers=['sura', 'aya', 'text'],datatype=["str", "str", "markdown"],wrap=True),gr.DownloadButton()],
75
+ examples=[
76
+ ["law of inheritance in islam"],
77
+ ["tunjukilah jalan yang lurus"],
78
+ ["سليمان"],
79
+ ],
80
+ title="Quran Finder")
81
+ #demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
82
+
83
+ if __name__ == "__main__":
84
+ demo.launch()