Spaces:
Sleeping
Sleeping
johannoriel
commited on
Commit
•
2124a36
1
Parent(s):
999be0c
Update app.py
Browse files
app.py
CHANGED
@@ -1,63 +1,77 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
"""
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
for
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
)
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
),
|
58 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
)
|
60 |
|
61 |
-
|
62 |
if __name__ == "__main__":
|
63 |
-
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
+
from transformers import AutoTokenizer, AutoModel
|
4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
+
import fitz # PyMuPDF
|
8 |
|
9 |
+
# Function to get available models from Hugging Face
|
10 |
+
def get_hf_models():
|
11 |
+
return ["Qwen/Qwen2.5-3B-Instruct", "HuggingFaceH4/zephyr-7b-beta", "mistralai/Mistral-7B-Instruct-v0.1"]
|
12 |
+
|
13 |
+
# Function to extract text from a PDF
|
14 |
+
def extract_text_from_pdf(pdf_path):
|
15 |
+
text = ""
|
16 |
+
with fitz.open(pdf_path) as doc:
|
17 |
+
for page in doc:
|
18 |
+
text += page.get_text()
|
19 |
+
return text
|
20 |
+
|
21 |
+
# Function for manual RAG
|
22 |
+
def manual_rag(query, context, client):
|
23 |
+
prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
|
24 |
+
response = client.text_generation(prompt, max_new_tokens=512)
|
25 |
+
return response
|
26 |
+
|
27 |
+
# Function for classic RAG
|
28 |
+
def classic_rag(query, pdf_path, client, embedder):
|
29 |
+
text = extract_text_from_pdf(pdf_path)
|
30 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
31 |
+
chunks = text_splitter.split_text(text)
|
32 |
+
embeddings = HuggingFaceEmbeddings(model_name=embedder)
|
33 |
+
db = FAISS.from_texts(chunks, embeddings)
|
34 |
+
docs = db.similarity_search(query, k=3)
|
35 |
+
context = " ".join([doc.page_content for doc in docs])
|
36 |
+
response = manual_rag(query, context, client)
|
37 |
+
return response, context
|
38 |
+
|
39 |
+
# Function for response without RAG
|
40 |
+
def no_rag(query, client):
|
41 |
+
response = client.text_generation(query, max_new_tokens=512)
|
42 |
+
return response
|
43 |
+
|
44 |
+
# Gradio interface function
|
45 |
+
def process_query(query, pdf_path, llm_choice, embedder_choice):
|
46 |
+
client = InferenceClient(llm_choice)
|
47 |
+
full_text = extract_text_from_pdf(pdf_path)
|
48 |
+
no_rag_response = no_rag(query, client)
|
49 |
+
manual_rag_response = manual_rag(query, full_text, client)
|
50 |
+
classic_rag_response, classic_rag_context = classic_rag(query, pdf_path, client, embedder_choice)
|
51 |
+
return no_rag_response, manual_rag_response, classic_rag_response, full_text, classic_rag_context
|
52 |
+
|
53 |
+
# Create Gradio interface
|
54 |
+
iface = gr.Interface(
|
55 |
+
fn=process_query,
|
56 |
+
inputs=[
|
57 |
+
gr.Textbox(label="Votre question"),
|
58 |
+
gr.File(label="Chargez votre PDF"),
|
59 |
+
gr.Dropdown(choices=get_hf_models(), label="Choisissez le LLM", value="Qwen/Qwen2.5-3B-Instruct"),
|
60 |
+
gr.Dropdown(choices=["sentence-transformers/all-MiniLM-L6-v2", "nomic-ai/nomic-embed-text-v1.5"],
|
61 |
+
label="Choisissez l'Embedder", value="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
62 |
],
|
63 |
+
outputs=[
|
64 |
+
gr.Textbox(label="Réponse sans RAG"),
|
65 |
+
gr.Textbox(label="Réponse avec RAG manuel"),
|
66 |
+
gr.Textbox(label="Réponse avec RAG classique"),
|
67 |
+
gr.Textbox(label="Texte complet du PDF (pour RAG manuel)", lines=10),
|
68 |
+
gr.Textbox(label="Contexte extrait (pour RAG classique)", lines=10)
|
69 |
+
],
|
70 |
+
title="Tutoriel RAG - Comparaison des méthodes",
|
71 |
+
description="Posez une question sur le contenu d'un PDF et comparez les réponses obtenues avec différentes méthodes de RAG.",
|
72 |
+
theme="default"
|
73 |
)
|
74 |
|
75 |
+
# Launch the application
|
76 |
if __name__ == "__main__":
|
77 |
+
iface.launch()
|