import os import chromadb import gradio as gr from dotenv import load_dotenv from openai import OpenAI from langchain_community.embeddings import AnyscaleEmbeddings from langchain_community.vectorstores import Chroma from pydantic import BaseModel from typing import Optional, List class Message(BaseModel): role: str content: str qna_system_message = """ You are an assistant to an insurance firm who answers customer queries based on their insurance policy documents. User input will have the context required by you to answer customer questions. This context will begin with the word: ###Context. The context contains references to specific portions of a document relevant to the customer query. Customer questions will begin with the word: ###Question. Information about the customer will begin with the word: ###Customer Information Please answer user questions ONLY using the context provided in the input and the customer information. DO NOT mention anything about the context in your final answer. Your response should only contain the answer to the question AND NOTHING ELSE. DO NOT answer any questions about customers whose details are different from those mentioned in ###Customer Information. If the answer is not found in the context or in the customer information, respond "Sorry, I cannot answer your query at this point, please contact our hotline: 1-800-INSURANCE". """ qna_user_message_template = """ ###Customer Information Customer Name: John Doe Policy Number: NBHTGBP22011V012223# Premium Amount: $15000 Number of premium installments: 5 Number of installments paid: 3 Last Premium Paid: Yes Last Premium Date: 2024-05-12 ###Context Here are some documents that are relevant to the question mentioned below. {context} ###Question {question} """ load_dotenv() anyscale_api_key = os.environ['ANYSCALE_API_KEY'] client = OpenAI( base_url="https://api.endpoints.anyscale.com/v1", api_key=anyscale_api_key ) qna_model = 'meta-llama/Meta-Llama-3-8B-Instruct' embedding_model = AnyscaleEmbeddings( client=client, model='thenlper/gte-large' ) chromadb_client = chromadb.PersistentClient(path='./policy_db') vectorstore_persisted = Chroma( client=chromadb_client, collection_name="policy-text", embedding_function=embedding_model ) retriever = vectorstore_persisted.as_retriever( search_type='similarity', search_kwargs={'k': 5} ) def make_completion(input:str, history: List[Message]) -> Optional[str]: relevant_document_chunks = retriever.invoke(input) context_list = [d.page_content for d in relevant_document_chunks] context_for_query = "\n".join(context_list) user_message = [{ 'role': 'user', 'content': qna_user_message_template.format( context=context_for_query, question=input ) }] prompt = [{'role':'system', 'content': qna_system_message}] + history + user_message try: response = client.chat.completions.create( model=qna_model, messages=prompt, temperature=0 ) prediction = response.choices[0].message.content.strip() except Exception as e: prediction = f'Sorry, I cannot answer your query at this point, please contact our hotline: 1-800-INSURANCE' return prediction def predict(input: str, history: List[Message]): """ Predict the response of the chatbot and complete a running list of chat history. """ response = make_completion(input, history) history.append({"role": "user", "content": input}) history.append({"role": "assistant", "content": response}) messages = [ (history[i]["content"], history[i+1]["content"]) for i in range(0, len(history)-1, 2) ] return messages, history with gr.Blocks() as demo: chatbot = gr.Chatbot(label="CHAT", layout="bubble", likeable=True, show_copy_button=True) state = gr.State([]) with gr.Row(): txt = gr.Textbox(show_label=True, placeholder="Enter your query and press enter") txt.submit(predict, [txt, state], [chatbot, state]) demo.launch(auth=("demouser", os.getenv('PASSWD')))