Spaces:

GIZ
/

audit_assistant

Running on CPU Upgrade

ppsingh commited on 6 days ago

Commit

47e053a

verified ·

1 Parent(s): f7e359e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -224,23 +224,23 @@ async def chat(query,history,sources,reports,subtype, client_ip=None, session_id
         async for update in process_stream():
             yield update
-    #else:
-    #    chat_model = dedicated_endpoint()
-    #    async def process_stream():
-    #    # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
-    #    # instead of modifying the one from the outer scope.
-    #        nonlocal answer_yet # Use the outer scope's answer_yet variable
-    #        # Iterate over the streaming response chunks
-    #        async for chunk in chat_model.astream(messages):
-    #            token = chunk.content
-    #            answer_yet += token
-    #            parsed_answer = parse_output_llm_with_sources(answer_yet)
-    #            history[-1] = (query, parsed_answer)
-    #            yield [tuple(x) for x in history], docs_html
-        # Stream the response updates
-    #    async for update in process_stream():
-    #        yield update
     else:
         chat_model = serverless_api() # TESTING: ADAPTED FOR HF INFERENCE API (needs to be reverted for production version)

         async for update in process_stream():
             yield update
+    elif model_config.get('reader','TYPE') == 'DEDICATED':
+        chat_model = dedicated_endpoint()
+        async def process_stream():
+        # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
+        # instead of modifying the one from the outer scope.
+            nonlocal answer_yet # Use the outer scope's answer_yet variable
+            # Iterate over the streaming response chunks
+            async for chunk in chat_model.astream(messages):
+                token = chunk.content
+                answer_yet += token
+                parsed_answer = parse_output_llm_with_sources(answer_yet)
+                history[-1] = (query, parsed_answer)
+                yield [tuple(x) for x in history], docs_html
+       # Stream the response updates
+        async for update in process_stream():
+            yield update
     else:
         chat_model = serverless_api() # TESTING: ADAPTED FOR HF INFERENCE API (needs to be reverted for production version)