Spaces:

TomData
/

PoliticsToYou

Sleeping

App Files Files Community

TomData commited on May 4, 2024

Commit

33014c1

1 Parent(s): b714046

New kws layout

Browse files

Files changed (3) hide show

Home.py +60 -6
requirements.txt +3 -1
src/chatbot.py +4 -9

Home.py CHANGED Viewed

@@ -1,13 +1,10 @@
 import gradio as gr
 from src.chatbot import chatbot, keyword_search
-# Adjust size of each block is not yet working
-output = [gr.Dataframe(line_breaks=True)]
-input = gr.Textbox()
 with gr.Blocks() as App:
     with gr.Tab("ChatBot"):
-        # Apply RAG using chatbut function from local file ChatBot.py
         gr.ChatInterface(chatbot,
                     title="PoliticsToYou",
                     description= "This chatbot uses the infomation of speeches of the german parliament (since 2021) \
@@ -15,10 +12,67 @@ with gr.Blocks() as App:
                     examples=["Wie steht die CDU zur Cannabislegalisierung?","Was waren die wichtigsten Themen in der aktuellen Legislaturperiode?"], #change to meaningful examples
                     cache_examples=False, #true increases the loading time
                     )
     with gr.Tab("KeyWordSearch"):
-        gr.Interface(fn=keyword_search, inputs=input, outputs=output,  max_batch_size = 10)
 if __name__ == "__main__":
     App.launch(share=False) #true not supported on hf spaces

 import gradio as gr
 from src.chatbot import chatbot, keyword_search
 with gr.Blocks() as App:
     with gr.Tab("ChatBot"):
+        #Apply RAG using chatbut function from local file ChatBot.py
         gr.ChatInterface(chatbot,
                     title="PoliticsToYou",
                     description= "This chatbot uses the infomation of speeches of the german parliament (since 2021) \
                     examples=["Wie steht die CDU zur Cannabislegalisierung?","Was waren die wichtigsten Themen in der aktuellen Legislaturperiode?"], #change to meaningful examples
                     cache_examples=False, #true increases the loading time
                     )
     with gr.Tab("KeyWordSearch"):
+        with gr.Blocks() as Block:
+            #Keyword Input
+            keyword_box = gr.Textbox(label='keyword')
+            #Additional Input (hidden)
+            with gr.Accordion('Detailed filters', open=False):
+                #Row orientation
+                with gr.Row() as additional_input:
+                    n_slider = gr.Slider(label="Number of Results", minimum=1, maximum=100, step=1, value=10)
+                    party_dopdown = gr.Dropdown(choices=['CDU/CSU','SPD','FDP','Grüne','not found','DIE LINKE.','PDS','KPD'], label='Party')
+            search_btn = gr.Button('Search')
+            with gr.Column(visible=False) as output_col:
+                results_df = gr.Dataframe(label='Results', interactive=False)
+                #Download results from keyword search
+                with gr.Accordion('Would you like to download your results?', open=False) as download_row:
+                    with gr.Row():
+                        ftype_dropdown = gr.Dropdown(choices=["csv","excel","json"], label="Format")
+                        export_btn = gr.Button('Export')
+                        file = gr.File(file_types=[".xlsx", ".csv", ".json"], visible=False)
+            #Keyword Search on click
+            def search(keyword, n, party): #ToDo: Include party
+                return {
+                    output_col: gr.Column(visible=True),
+                    results_df: keyword_search(query=keyword, n=n),
+                }
+            search_btn.click(
+                fn=search,
+                inputs=[keyword_box, n_slider, party_dopdown],
+                outputs=[output_col, results_df],
+            )
+            #Export data to a downloadable format
+            def export(df, keyword, ftype=None):
+                if ftype == "csv":
+                    file = f'{keyword}.csv'
+                    df.to_csv(file, index = False)
+                    return gr.File(value=file,visible=True)
+                elif ftype == "json":
+                    file = f'{keyword}.json'
+                    df.to_json(file, index = True)
+                    return gr.File(value=file,visible=True)
+                else:
+                    file = f'{keyword}.xlsx'
+                    df.to_excel(file, index = True)
+                    return gr.File(value=file,visible=True)
+            export_btn.click(
+                fn=export,
+                inputs=[results_df, keyword_box, ftype_dropdown],
+                outputs=[file],
+            )
 if __name__ == "__main__":
     App.launch(share=False) #true not supported on hf spaces

requirements.txt CHANGED Viewed

@@ -2,6 +2,8 @@ pandas==2.1.3
 langchain==0.1.15
 transformers==4.35.2
 gradio==4.26.0
 sentence-transformers==2.6.1
 python-dotenv
-faiss-cpu

 langchain==0.1.15
 transformers==4.35.2
 gradio==4.26.0
+gradio-calendar
 sentence-transformers==2.6.1
 python-dotenv
+faiss-cpu
+openpyxl

src/chatbot.py CHANGED Viewed

@@ -12,7 +12,6 @@ import os
 #load_dotenv(find_dotenv())
 embeddings = HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
 llm = HuggingFaceHub(
     # Try different model here
@@ -64,26 +63,22 @@ def chatbot(message, history, db=db, llm=llm, prompt=prompt2):
     return response
 # Retrieve speech contents based on keywords
-def keyword_search(query, db=db, embeddings=embeddings):
     query_embedding = embeddings.embed_query(query)
-    results =  db.max_marginal_relevance_search_with_score_by_vector(query_embedding)
     # Format vector store query results into dataframe
     #print(results[0][0].metadata.keys())
     df_res = pd.DataFrame(columns=['Speech Content','Date', 'Party', 'Relevance']) # Add Date/Party/Politician
-    i = 0
     for doc in results:
         speech_content = doc[0].page_content
         speech_date = doc[0].metadata["date"]
         party = doc[0].metadata["party"]
-        score = doc[1] # Relevance based on relevance search
         df_res = pd.concat([df_res, pd.DataFrame({'Speech Content': [speech_content],
                                                           'Date': [speech_date],
                                                           'Party': [party],
                                                           'Relevance': [score]})], ignore_index=True)
-        i = i + 1
-        if i > 2:
-            break
     df_res.sort_values('Relevance', inplace=True, ascending=False)
     return df_res

 #load_dotenv(find_dotenv())
 embeddings = HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
 llm = HuggingFaceHub(
     # Try different model here
     return response
 # Retrieve speech contents based on keywords
+def keyword_search(query,n=10, db=db, embeddings=embeddings):
     query_embedding = embeddings.embed_query(query)
+    results =  db.max_marginal_relevance_search_with_score_by_vector(query_embedding, k = n)
     # Format vector store query results into dataframe
     #print(results[0][0].metadata.keys())
     df_res = pd.DataFrame(columns=['Speech Content','Date', 'Party', 'Relevance']) # Add Date/Party/Politician
     for doc in results:
         speech_content = doc[0].page_content
         speech_date = doc[0].metadata["date"]
         party = doc[0].metadata["party"]
+        score = round(doc[1], ndigits=2) # Relevance based on relevance search
         df_res = pd.concat([df_res, pd.DataFrame({'Speech Content': [speech_content],
                                                           'Date': [speech_date],
                                                           'Party': [party],
                                                           'Relevance': [score]})], ignore_index=True)
     df_res.sort_values('Relevance', inplace=True, ascending=False)
     return df_res