Spaces:

GIZ
/

audit_assistant

Running on CPU Upgrade

App Files Files Community

ppsingh commited on 6 days ago

Commit

fe9e08b

verified ·

1 Parent(s): 3580bbe

UI updates

Browse files

Files changed (1) hide show

app.py +215 -44

app.py CHANGED Viewed

@@ -42,9 +42,9 @@ scheduler = CommitScheduler(
 # We need to create the local vectorstore collection once using load_chunks
 # vectorestore colection are stored on persistent storage so this needs to be run only once
 # hence, comment out line below when creating for first time
-vectorstores = load_chunks()
 # once the vectore embeddings  are created we will use qdrant client to access these
-#vectorstores = get_local_qdrant()
 #####---------------------CHAT-----------------------------------------------------
 def start_chat(query,history):
@@ -54,12 +54,75 @@ def start_chat(query,history):
 def finish_chat():
     return (gr.update(interactive = True,value = ""))
-async def chat(query,history,sources,reports,subtype,year):
     """taking a query and a message history, use a pipeline (reformulation, retriever, answering)
        to yield a tuple of:(messages in gradio format/messages in langchain format, source documents)
     """
     print(f">> NEW QUESTION : {query}")
     print(f"history:{history}")
     print(f"sources:{sources}")
@@ -70,7 +133,7 @@ async def chat(query,history,sources,reports,subtype,year):
     output_query = ""
     ##------------------------fetch collection from vectorstore------------------------------
-    vectorstore = vectorstores["allreports"]
     ##------------------------------get context----------------------------------------------
     context_retrieved = get_context(vectorstore=vectorstore,query=query,reports=reports,
@@ -113,6 +176,25 @@ async def chat(query,history,sources,reports,subtype,year):
     ##-----------------------get answer from endpoints------------------------------
     answer_yet = ""
     if model_config.get('reader','TYPE') == 'NVIDIA':
         chat_model = nvidia_client()
         async def process_stream():
@@ -138,42 +220,61 @@ async def chat(query,history,sources,reports,subtype,year):
         async for update in process_stream():
             yield update
     else:
-        chat_model = dedicated_endpoint()
         async def process_stream():
-        # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
-        # instead of modifying the one from the outer scope.
-            nonlocal answer_yet # Use the outer scope's answer_yet variable
-            # Iterate over the streaming response chunks
-            async for chunk in chat_model.astream(messages):
-                token = chunk.content
-                answer_yet += token
-                parsed_answer = parse_output_llm_with_sources(answer_yet)
-                history[-1] = (query, parsed_answer)
-                yield [tuple(x) for x in history], docs_html
-        # Stream the response updates
         async for update in process_stream():
             yield update
     # logging the event
     try:
-        timestamp = str(datetime.now().timestamp())
-        logs = {
-                "system_prompt": SYSTEM_PROMPT,
-                "sources":sources,
-                "reports":reports,
-                "subtype":subtype,
-                "year":year,
-                "question":query,
-                "sources":sources,
-                "retriever":model_config.get('retriever','MODEL'),
-                "endpoint_type":model_config.get('reader','TYPE'),
-                "raeder":model_config.get('reader','NVIDIA_MODEL'),
-                "docs":[doc.page_content for doc in context_retrieved],
-                "answer": history[-1][1],
-                "time": timestamp,
-            }
         save_logs(scheduler,JSON_DATASET_PATH,logs)
     except Exception as e:
         logging.error(e)
@@ -314,7 +415,34 @@ with gr.Blocks(title="Audit Q&A", css= "style.css", theme=theme,elem_id = "main-
         return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
     dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
     # static tab 'about us'
     with gr.Tab("About",elem_classes = "max-height other-tabs"):
@@ -380,21 +508,64 @@ with gr.Blocks(title="Audit Q&A", css= "style.css", theme=theme,elem_id = "main-
-    # using event listeners for 1. query box 2. click on example question
-    # https://www.gradio.app/docs/gradio/textbox#event-listeners-arguments
     (textbox
-    .submit(start_chat, [textbox, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_textbox")
-    # queue must be set as False (default) so the process is not waiting for another to be finished
-    .then(chat, [textbox, chatbot, dropdown_sources, dropdown_reports, dropdown_category, dropdown_year], [chatbot, sources_textbox], queue=True, concurrency_limit=8, api_name="chat_textbox")
-    .then(finish_chat, None, [textbox], api_name="finish_chat_textbox"))
     (examples_hidden
         .change(start_chat, [examples_hidden, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_examples")
-        # queue must be set as False (default) so the process is not waiting for another to be finished
-        .then(chat, [examples_hidden, chatbot, dropdown_sources, dropdown_reports, dropdown_category, dropdown_year], [chatbot, sources_textbox], concurrency_limit=8, api_name="chat_examples")
-        .then(finish_chat, None, [textbox], api_name="finish_chat_examples")
-    )
     demo.queue()
 demo.launch()

 # We need to create the local vectorstore collection once using load_chunks
 # vectorestore colection are stored on persistent storage so this needs to be run only once
 # hence, comment out line below when creating for first time
+#vectorstores = load_chunks()
 # once the vectore embeddings  are created we will use qdrant client to access these
+vectorstores = get_local_qdrant()
 #####---------------------CHAT-----------------------------------------------------
 def start_chat(query,history):
 def finish_chat():
     return (gr.update(interactive = True,value = ""))
+def submit_feedback(feedback, logs_data):
+    """Handle feedback submission"""
+    try:
+        if logs_data is None:
+            return gr.update(visible=False), gr.update(visible=True)
+        session_id = logs_data.get("session_id")
+        if session_id:
+            # Update session last_activity to now
+            session_manager.update_session(session_id)
+            # Compute duration from the session manager and update the log.
+            logs_data["session_duration_seconds"] = session_manager.get_session_duration(session_id)
+        # Now save the (feedback) log record
+        save_logs(scheduler, JSON_DATASET_PATH, logs_data, feedback)
+        return gr.update(visible=False), gr.update(visible=True)
+    except Exception as e:
+        return gr.update(visible=False), gr.update(visible=True)
+# Session Manager added (track session duration, location, and platform)
+class SessionManager:
+    def __init__(self):
+        self.sessions = {}
+    def create_session(self, client_ip, user_agent):
+        session_id = str(uuid4())
+        self.sessions[session_id] = {
+            'start_time': datetime.now(),
+            'last_activity': datetime.now(),
+            'client_ip': client_ip,
+            'location_info': get_client_location(client_ip),
+            'platform_info': get_platform_info(user_agent)
+        }
+        return session_id
+    def update_session(self, session_id):
+        if session_id in self.sessions:
+            self.sessions[session_id]['last_activity'] = datetime.now()
+    def get_session_duration(self, session_id):
+        if session_id in self.sessions:
+            start = self.sessions[session_id]['start_time']
+            last = self.sessions[session_id]['last_activity']
+            return (last - start).total_seconds()
+        return 0
+    def get_session_data(self, session_id):
+        return self.sessions.get(session_id)
+# Initialize session manager
+session_manager = SessionManager()
+async def chat(query,history,sources,reports,subtype,year, client_ip=None, session_id = None, request:gr.Request = None):
     """taking a query and a message history, use a pipeline (reformulation, retriever, answering)
        to yield a tuple of:(messages in gradio format/messages in langchain format, source documents)
     """
+    if not session_id:
+        user_agent =  request.headers.get('User-Agent','') if request else ''
+        session_id = session_manager.create_session(clinet_ip, user_agent)
+    else:
+        session_manager.update_session(session_id)
+    # Get session id
+    session_data = session_manager.get_session_data(session_id)
+    session_duration = session_manager.get_session_duration(session_id)
     print(f">> NEW QUESTION : {query}")
     print(f"history:{history}")
     print(f"sources:{sources}")
     output_query = ""
     ##------------------------fetch collection from vectorstore------------------------------
+    vectorstore = vectorstores["docling"]
     ##------------------------------get context----------------------------------------------
     context_retrieved = get_context(vectorstore=vectorstore,query=query,reports=reports,
     ##-----------------------get answer from endpoints------------------------------
     answer_yet = ""
+    logs_data = {
+        "record_id": str(uuid4()),  # Add unique record ID
+        "session_id": session_id,
+        "session_duration_seconds": session_duration,
+        "client_location": session_data['location_info'],
+        "platform": session_data['platform_info'],
+        # "system_prompt": SYSTEM_PROMPT, #REMOVED FOR TESTING
+        # "sources": sources, #REMOVED FOR TESTING
+        # "reports": reports, #REMOVED FOR TESTING
+        # "subtype": subtype, #REMOVED FOR TESTING
+        "year": year,
+        "question": query,
+        "retriever": model_config.get('retriever','MODEL'),
+        "endpoint_type": model_config.get('reader','TYPE'),
+        "reader": model_config.get('reader','NVIDIA_MODEL'),
+        # "docs": [doc.page_content for doc in context_retrieved], #REMOVED FOR TESTING
+    }
     if model_config.get('reader','TYPE') == 'NVIDIA':
         chat_model = nvidia_client()
         async def process_stream():
         async for update in process_stream():
             yield update
+    #else:
+    #    chat_model = dedicated_endpoint()
+    #    async def process_stream():
+    #    # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
+    #    # instead of modifying the one from the outer scope.
+    #        nonlocal answer_yet # Use the outer scope's answer_yet variable
+    #        # Iterate over the streaming response chunks
+    #        async for chunk in chat_model.astream(messages):
+    #            token = chunk.content
+    #            answer_yet += token
+    #            parsed_answer = parse_output_llm_with_sources(answer_yet)
+    #            history[-1] = (query, parsed_answer)
+    #            yield [tuple(x) for x in history], docs_html
+        # Stream the response updates
+    #    async for update in process_stream():
+    #        yield update
     else:
+        chat_model = dedicated_endpoint() # TESTING: ADAPTED FOR HF INFERENCE API (needs to be reverted for production version)
         async def process_stream():
+            nonlocal answer_yet
+            try:
+                formatted_messages = [
+                    {
+                        "role": msg.type if hasattr(msg, 'type') else msg.role,
+                        "content": msg.content
+                    }
+                    for msg in messages
+                ]
+                response = chat_model.chat_completion(
+                    messages=formatted_messages,
+                    max_tokens=int(model_config.get('reader', 'MAX_TOKENS'))
+                )
+                response_text = response.choices[0].message.content
+                words = response_text.split()
+                for word in words:
+                    answer_yet += word + " "
+                    parsed_answer = parse_output_llm_with_sources(answer_yet)
+                    history[-1] = (query, parsed_answer)
+                    # Update logs_data with current answer (and get a new timestamp)
+                    logs_data["answer"] = parsed_answer
+                    yield [tuple(x) for x in history], docs_html, logs_data, session_id
+                    await asyncio.sleep(0.05)
+            except Exception as e:
+                raise
         async for update in process_stream():
             yield update
     # logging the event
     try:
         save_logs(scheduler,JSON_DATASET_PATH,logs)
     except Exception as e:
         logging.error(e)
         return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
     dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
+            # ---- New Guidelines Tab ----
+    with gr.Tab("Guidelines", elem_classes="max-height other-tabs"):
+        gr.Markdown("""
+        Welcome to Audit Q&A, your AI-powered assistant for exploring and understanding Uganda's audit reports. This tool leverages advanced language models to help you get clear and structured answers based on audit publications. To get you started, here a few tips on how to use the tool:
+        ### Crafting Effective Prompts
+        - **Be Clear and Specific**: Frame your questions clearly and focus on what you want to learn.
+        - **One Topic at a Time**: Break complex queries into simpler, focused questions.
+        - **Be Direct**: Instead of "What are the findings?", try "What were the main issues identified in procurement practices?" or "What challenges were found in revenue collection?"
+        ### Best Practices
+        - Start with a simple, focused question.
+        - Follow up with additional questions if your initial query doesn't yield the desired results.
+        - Experiment with different phrasings to get the most accurate answers.
+        - Use the source citations as a reference to validate the provided information.
+        ### Utilizing Filters
+        - **Report Category & Subtype**: Use the "Reports" tab to choose your preferred report category and refine your query by selecting a specific sub-type. This will help narrow down the context for your question.
+        - **Year Selection**: Choose one or more years from the "Year" filter to target your query to specific time periods.
+        - **Specific Reports**: Optionally, select specific reports using the dropdown to focus on a particular document or set of documents.
+        ### Useful Resources
+        - <ins>[**Short Course: Generative AI for Everyone** (3 hours)](https://www.deeplearning.ai/courses/generative-ai-for-everyone/)</ins>
+        - <ins>[**Short Course: Advanced Prompting** (1 hour)](https://www.deeplearning.ai/courses/ai-for-everyone/)</ins>
+        - <ins>[**Short Course: Introduction to AI with IBM** (13 hours)](https://www.coursera.org/learn/introduction-to-ai)</ins>
+        Enjoy using Audit Q&A and happy prompting!
+                """)
     # static tab 'about us'
     with gr.Tab("About",elem_classes = "max-height other-tabs"):
+    def show_feedback(logs):
+        """Show feedback buttons and store logs in state"""
+        return gr.update(visible=True), gr.update(visible=False), logs
+    def submit_feedback_okay(logs_data):
+        """Handle 'okay' feedback submission"""
+        return submit_feedback("okay", logs_data)
+    def submit_feedback_not_okay(logs_data):
+        """Handle 'not okay' feedback submission"""
+        return submit_feedback("not_okay", logs_data)
+    okay_btn.click(
+        submit_feedback_okay,
+        [feedback_state],
+        [feedback_row, feedback_thanks]
+    )
+    not_okay_btn.click(
+        submit_feedback_not_okay,
+        [feedback_state],
+        [feedback_row, feedback_thanks]
+    #-------------------- Session Management + Geolocation -------------------------
+    # Add these state components at the top level of the Blocks
+    session_id = gr.State(None)
+    client_ip = gr.State(None)
+    @demo.load(api_name="get_client_ip")
+    def get_client_ip_handler(dummy_input="", request: gr.Request = None):
+        """Handler for getting client IP in Gradio context"""
+        return get_client_ip(request)
+    )
+    #-------------------- Gradio voodoo -------------------------
+    # Update the event handlers
     (textbox
+        .submit(get_client_ip_handler, [textbox], [client_ip], api_name="get_ip_textbox")
+        .then(start_chat, [textbox, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_textbox")
+        .then(chat,
+            [textbox, chatbot, dropdown_sources, dropdown_reports, dropdown_category, dropdown_year, client_ip, session_id],
+            [chatbot, sources_textbox, feedback_state, session_id],
+            queue=True, concurrency_limit=8, api_name="chat_textbox")
+        .then(show_feedback, [feedback_state], [feedback_row, feedback_thanks, feedback_state], api_name="show_feedback_textbox")
+        .then(finish_chat, None, [textbox], api_name="finish_chat_textbox"))
     (examples_hidden
         .change(start_chat, [examples_hidden, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_examples")
+        .then(get_client_ip_handler, [examples_hidden], [client_ip], api_name="get_ip_examples")
+        .then(chat,
+            [examples_hidden, chatbot, dropdown_sources, dropdown_reports, dropdown_category, dropdown_year, client_ip, session_id],
+            [chatbot, sources_textbox, feedback_state, session_id],
+            concurrency_limit=8, api_name="chat_examples")
+        .then(show_feedback, [feedback_state], [feedback_row, feedback_thanks, feedback_state], api_name="show_feedback_examples")
+        .then(finish_chat, None, [textbox], api_name="finish_chat_examples"))
     demo.queue()
 demo.launch()