Spaces:
Sleeping
Sleeping
New kws layout
Browse files- Home.py +60 -6
- requirements.txt +3 -1
- src/chatbot.py +4 -9
Home.py
CHANGED
@@ -1,13 +1,10 @@
|
|
1 |
import gradio as gr
|
2 |
from src.chatbot import chatbot, keyword_search
|
3 |
|
4 |
-
# Adjust size of each block is not yet working
|
5 |
-
output = [gr.Dataframe(line_breaks=True)]
|
6 |
-
input = gr.Textbox()
|
7 |
|
8 |
with gr.Blocks() as App:
|
9 |
with gr.Tab("ChatBot"):
|
10 |
-
#
|
11 |
gr.ChatInterface(chatbot,
|
12 |
title="PoliticsToYou",
|
13 |
description= "This chatbot uses the infomation of speeches of the german parliament (since 2021) \
|
@@ -15,10 +12,67 @@ with gr.Blocks() as App:
|
|
15 |
examples=["Wie steht die CDU zur Cannabislegalisierung?","Was waren die wichtigsten Themen in der aktuellen Legislaturperiode?"], #change to meaningful examples
|
16 |
cache_examples=False, #true increases the loading time
|
17 |
)
|
|
|
18 |
with gr.Tab("KeyWordSearch"):
|
19 |
-
gr.Interface(fn=keyword_search, inputs=input, outputs=output, max_batch_size = 10)
|
20 |
-
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
if __name__ == "__main__":
|
23 |
App.launch(share=False) #true not supported on hf spaces
|
24 |
|
|
|
1 |
import gradio as gr
|
2 |
from src.chatbot import chatbot, keyword_search
|
3 |
|
|
|
|
|
|
|
4 |
|
5 |
with gr.Blocks() as App:
|
6 |
with gr.Tab("ChatBot"):
|
7 |
+
#Apply RAG using chatbut function from local file ChatBot.py
|
8 |
gr.ChatInterface(chatbot,
|
9 |
title="PoliticsToYou",
|
10 |
description= "This chatbot uses the infomation of speeches of the german parliament (since 2021) \
|
|
|
12 |
examples=["Wie steht die CDU zur Cannabislegalisierung?","Was waren die wichtigsten Themen in der aktuellen Legislaturperiode?"], #change to meaningful examples
|
13 |
cache_examples=False, #true increases the loading time
|
14 |
)
|
15 |
+
|
16 |
with gr.Tab("KeyWordSearch"):
|
|
|
|
|
17 |
|
18 |
+
with gr.Blocks() as Block:
|
19 |
+
#Keyword Input
|
20 |
+
keyword_box = gr.Textbox(label='keyword')
|
21 |
+
|
22 |
+
#Additional Input (hidden)
|
23 |
+
with gr.Accordion('Detailed filters', open=False):
|
24 |
+
#Row orientation
|
25 |
+
with gr.Row() as additional_input:
|
26 |
+
n_slider = gr.Slider(label="Number of Results", minimum=1, maximum=100, step=1, value=10)
|
27 |
+
party_dopdown = gr.Dropdown(choices=['CDU/CSU','SPD','FDP','Grüne','not found','DIE LINKE.','PDS','KPD'], label='Party')
|
28 |
+
|
29 |
+
search_btn = gr.Button('Search')
|
30 |
+
|
31 |
+
with gr.Column(visible=False) as output_col:
|
32 |
+
results_df = gr.Dataframe(label='Results', interactive=False)
|
33 |
+
|
34 |
+
#Download results from keyword search
|
35 |
+
with gr.Accordion('Would you like to download your results?', open=False) as download_row:
|
36 |
+
with gr.Row():
|
37 |
+
ftype_dropdown = gr.Dropdown(choices=["csv","excel","json"], label="Format")
|
38 |
+
export_btn = gr.Button('Export')
|
39 |
+
file = gr.File(file_types=[".xlsx", ".csv", ".json"], visible=False)
|
40 |
+
|
41 |
+
#Keyword Search on click
|
42 |
+
def search(keyword, n, party): #ToDo: Include party
|
43 |
+
return {
|
44 |
+
output_col: gr.Column(visible=True),
|
45 |
+
results_df: keyword_search(query=keyword, n=n),
|
46 |
+
}
|
47 |
+
|
48 |
+
search_btn.click(
|
49 |
+
fn=search,
|
50 |
+
inputs=[keyword_box, n_slider, party_dopdown],
|
51 |
+
outputs=[output_col, results_df],
|
52 |
+
)
|
53 |
+
|
54 |
+
#Export data to a downloadable format
|
55 |
+
def export(df, keyword, ftype=None):
|
56 |
+
if ftype == "csv":
|
57 |
+
file = f'{keyword}.csv'
|
58 |
+
df.to_csv(file, index = False)
|
59 |
+
return gr.File(value=file,visible=True)
|
60 |
+
elif ftype == "json":
|
61 |
+
file = f'{keyword}.json'
|
62 |
+
df.to_json(file, index = True)
|
63 |
+
return gr.File(value=file,visible=True)
|
64 |
+
else:
|
65 |
+
file = f'{keyword}.xlsx'
|
66 |
+
df.to_excel(file, index = True)
|
67 |
+
return gr.File(value=file,visible=True)
|
68 |
+
|
69 |
+
export_btn.click(
|
70 |
+
fn=export,
|
71 |
+
inputs=[results_df, keyword_box, ftype_dropdown],
|
72 |
+
outputs=[file],
|
73 |
+
)
|
74 |
+
|
75 |
+
|
76 |
if __name__ == "__main__":
|
77 |
App.launch(share=False) #true not supported on hf spaces
|
78 |
|
requirements.txt
CHANGED
@@ -2,6 +2,8 @@ pandas==2.1.3
|
|
2 |
langchain==0.1.15
|
3 |
transformers==4.35.2
|
4 |
gradio==4.26.0
|
|
|
5 |
sentence-transformers==2.6.1
|
6 |
python-dotenv
|
7 |
-
faiss-cpu
|
|
|
|
2 |
langchain==0.1.15
|
3 |
transformers==4.35.2
|
4 |
gradio==4.26.0
|
5 |
+
gradio-calendar
|
6 |
sentence-transformers==2.6.1
|
7 |
python-dotenv
|
8 |
+
faiss-cpu
|
9 |
+
openpyxl
|
src/chatbot.py
CHANGED
@@ -12,7 +12,6 @@ import os
|
|
12 |
#load_dotenv(find_dotenv())
|
13 |
|
14 |
|
15 |
-
|
16 |
embeddings = HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
|
17 |
llm = HuggingFaceHub(
|
18 |
# Try different model here
|
@@ -64,26 +63,22 @@ def chatbot(message, history, db=db, llm=llm, prompt=prompt2):
|
|
64 |
return response
|
65 |
|
66 |
# Retrieve speech contents based on keywords
|
67 |
-
def keyword_search(query, db=db, embeddings=embeddings):
|
68 |
query_embedding = embeddings.embed_query(query)
|
69 |
-
results = db.max_marginal_relevance_search_with_score_by_vector(query_embedding)
|
70 |
# Format vector store query results into dataframe
|
71 |
#print(results[0][0].metadata.keys())
|
72 |
|
73 |
df_res = pd.DataFrame(columns=['Speech Content','Date', 'Party', 'Relevance']) # Add Date/Party/Politician
|
74 |
-
i = 0
|
75 |
for doc in results:
|
76 |
speech_content = doc[0].page_content
|
77 |
speech_date = doc[0].metadata["date"]
|
78 |
party = doc[0].metadata["party"]
|
79 |
-
score = doc[1] # Relevance based on relevance search
|
80 |
df_res = pd.concat([df_res, pd.DataFrame({'Speech Content': [speech_content],
|
81 |
'Date': [speech_date],
|
82 |
'Party': [party],
|
83 |
'Relevance': [score]})], ignore_index=True)
|
84 |
-
|
85 |
-
if i > 2:
|
86 |
-
break
|
87 |
-
|
88 |
df_res.sort_values('Relevance', inplace=True, ascending=False)
|
89 |
return df_res
|
|
|
12 |
#load_dotenv(find_dotenv())
|
13 |
|
14 |
|
|
|
15 |
embeddings = HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
|
16 |
llm = HuggingFaceHub(
|
17 |
# Try different model here
|
|
|
63 |
return response
|
64 |
|
65 |
# Retrieve speech contents based on keywords
|
66 |
+
def keyword_search(query,n=10, db=db, embeddings=embeddings):
|
67 |
query_embedding = embeddings.embed_query(query)
|
68 |
+
results = db.max_marginal_relevance_search_with_score_by_vector(query_embedding, k = n)
|
69 |
# Format vector store query results into dataframe
|
70 |
#print(results[0][0].metadata.keys())
|
71 |
|
72 |
df_res = pd.DataFrame(columns=['Speech Content','Date', 'Party', 'Relevance']) # Add Date/Party/Politician
|
|
|
73 |
for doc in results:
|
74 |
speech_content = doc[0].page_content
|
75 |
speech_date = doc[0].metadata["date"]
|
76 |
party = doc[0].metadata["party"]
|
77 |
+
score = round(doc[1], ndigits=2) # Relevance based on relevance search
|
78 |
df_res = pd.concat([df_res, pd.DataFrame({'Speech Content': [speech_content],
|
79 |
'Date': [speech_date],
|
80 |
'Party': [party],
|
81 |
'Relevance': [score]})], ignore_index=True)
|
82 |
+
|
|
|
|
|
|
|
83 |
df_res.sort_values('Relevance', inplace=True, ascending=False)
|
84 |
return df_res
|