Omartificial-Intelligence-Space commited on
Commit
3715692
·
verified ·
1 Parent(s): c1017d3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ from wikipediaapi import Wikipedia
4
+ import textwrap
5
+ import numpy as np
6
+ import openai
7
+
8
+ # Function to process the input and generate the output
9
+ def process_query(wiki_page, model_name, embed_dim, query, api_key):
10
+ model_mapping = {
11
+ "Arabic-mpnet-base-all-nli-triplet": "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet",
12
+ "Arabic-all-nli-triplet-Matryoshka": "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka",
13
+ "Arabert-all-nli-triplet-Matryoshka": "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka",
14
+ "Arabic-labse-Matryoshka": "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka",
15
+ "Marbert-all-nli-triplet-Matryoshka": "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka"
16
+ }
17
+
18
+ model_path = model_mapping[model_name]
19
+ model = SentenceTransformer(model_path, trust_remote_code=True, truncate_dim=embed_dim)
20
+ wiki = Wikipedia('RAGBot/0.0', 'ar')
21
+ doc = wiki.page(wiki_page).text
22
+ paragraphs = doc.split('\n\n') # chunking
23
+
24
+ for i, p in enumerate(paragraphs):
25
+ wrapped_text = textwrap.fill(p, width=100)
26
+
27
+ docs_embed = model.encode(paragraphs, normalize_embeddings=True)
28
+ query_embed = model.encode(query, normalize_embeddings=True)
29
+ similarities = np.dot(docs_embed, query_embed.T)
30
+ top_3_idx = np.argsort(similarities, axis=0)[-3:][::-1].tolist()
31
+ most_similar_documents = [paragraphs[idx] for idx in top_3_idx]
32
+
33
+ CONTEXT = ""
34
+ for i, p in enumerate(most_similar_documents):
35
+ wrapped_text = textwrap.fill(p, width=100)
36
+ CONTEXT += wrapped_text + "\n\n"
37
+
38
+ prompt = f"""
39
+ use the following CONTEXT to answer the QUESTION at the end.
40
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
41
+
42
+ CONTEXT: {CONTEXT}
43
+ QUESTION: {query}
44
+ """
45
+
46
+ client = openai.OpenAI(api_key=api_key)
47
+ response = client.chat_completions.create(
48
+ model="gpt-4o",
49
+ messages=[
50
+ {"role": "user", "content": prompt},
51
+ ]
52
+ )
53
+
54
+ return response.choices[0].message.content
55
+
56
+ # Define the interface
57
+ wiki_page_input = gr.Textbox(label="Wikipedia Page (in Arabic)")
58
+ query_input = gr.Textbox(label="Query (in Arabic)")
59
+ api_key_input = gr.Textbox(label="OpenAI API Key", type="password")
60
+
61
+ model_choice = gr.Dropdown(
62
+ choices=[
63
+ "Arabic-mpnet-base-all-nli-triplet",
64
+ "Arabic-all-nli-triplet-Matryoshka",
65
+ "Arabert-all-nli-triplet-Matryoshka",
66
+ "Arabic-labse-Matryoshka",
67
+ "Marbert-all-nli-triplet-Matryoshka"
68
+ ],
69
+ label="Choose Embedding Model"
70
+ )
71
+
72
+ embed_dim_choice = gr.Dropdown(
73
+ choices=[768, 512, 256, 128, 64],
74
+ label="Embedding Dimension"
75
+ )
76
+
77
+ output_text = gr.Textbox(label="Output")
78
+
79
+ gr.Interface(
80
+ fn=process_query,
81
+ inputs=[wiki_page_input, model_choice, embed_dim_choice, query_input, api_key_input],
82
+ outputs=output_text,
83
+ title="Arabic Wiki RAG",
84
+ description="Choose a Wikipedia page, embedding model, and dimension to answer a query in Arabic."
85
+ ).launch()