Spaces:

lingyit1108
/

ragtest-sakimilo

Running

App Files Files Community

lingyit1108 commited on Feb 19, 2024

Commit

69e20d0

1 Parent(s): 8434471

finishing up the app: UI, content understanding, questionaire, coaching

Browse files

Files changed (9) hide show

bin/clean.sh +3 -1
database/mock_qna.sqlite +1 -1
notebooks/002_persisted-embedding-model.ipynb +1 -0
notebooks/007_test_hi_content_engine.ipynb +426 -0
qna_prompting.py +54 -81
raw_documents/overview_background.txt +3 -0
resource/disney-cuties-little-winnie-the-pooh-emoticon.png +0 -0
resource/disney-cuties-piglet-emoticon.png +0 -0
streamlit_app.py +79 -23

bin/clean.sh CHANGED Viewed

@@ -2,4 +2,6 @@
 find . -name __pycache__ | xargs rm -rf
 find . -name .pytest_cache | xargs rm -rf
-find . -name .ipynb_checkpoints | xargs rm -rf

 find . -name __pycache__ | xargs rm -rf
 find . -name .pytest_cache | xargs rm -rf
+find . -name .ipynb_checkpoints | xargs rm -rf
+python reset_database.py

database/mock_qna.sqlite CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8949591dc84ad447843c2741803c39e545dc11c6e39cefca75ab1416a6140e3a
 size 20480

 version https://git-lfs.github.com/spec/v1
+oid sha256:66df2080ffb3456c39b4bf554effc49266e957e63792ce12a4f11cb991f369fd
 size 20480

notebooks/002_persisted-embedding-model.ipynb CHANGED Viewed

@@ -40,6 +40,7 @@
    "source": [
     "# load some documents\n",
     "documents = SimpleDirectoryReader(input_files=[\n",
     "                                    \"../raw_documents/HI_Knowledge_Base.pdf\",\n",
     "                                    \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
     "                                    \"../raw_documents/qna.txt\"\n",

    "source": [
     "# load some documents\n",
     "documents = SimpleDirectoryReader(input_files=[\n",
+    "                                    \"../raw_documents/overview_background.txt\",\n",
     "                                    \"../raw_documents/HI_Knowledge_Base.pdf\",\n",
     "                                    \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
     "                                    \"../raw_documents/qna.txt\"\n",

notebooks/007_test_hi_content_engine.ipynb ADDED Viewed

	@@ -0,0 +1,426 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac0cc1aa-e68d-432d-b316-52e272c43207",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import streamlit as st\n",
+    "from streamlit_feedback import streamlit_feedback\n",
+    "\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "import base64\n",
+    "from io import BytesIO\n",
+    "import sys\n",
+    "sys.path.insert(0, \"../\")\n",
+    "\n",
+    "import chromadb\n",
+    "from llama_index.core import (\n",
+    "            VectorStoreIndex, \n",
+    "            SimpleDirectoryReader,\n",
+    "            StorageContext,\n",
+    "            Document\n",
+    ")\n",
+    "from llama_index.vector_stores.chroma.base import ChromaVectorStore\n",
+    "from llama_index.embeddings.huggingface.base import HuggingFaceEmbedding\n",
+    "from llama_index.llms.openai import OpenAI\n",
+    "from llama_index.core.memory import ChatMemoryBuffer\n",
+    "from llama_index.core.tools import QueryEngineTool\n",
+    "from llama_index.agent.openai import OpenAIAgent\n",
+    "from llama_index.core import Settings\n",
+    "\n",
+    "from vision_api import get_transcribed_text\n",
+    "from qna_prompting import get_qna_question_tool, evaluate_qna_answer_tool\n",
+    "\n",
+    "import nest_asyncio\n",
+    "nest_asyncio.apply()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b05cb9b-869a-409c-8d4f-aafae703c558",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@st.cache_resource\n",
+    "def get_document_object(input_files):\n",
+    "    documents = SimpleDirectoryReader(input_files=input_files).load_data()\n",
+    "    document = Document(text=\"\\n\\n\".join([doc.text for doc in documents]))\n",
+    "    return document\n",
+    "\n",
+    "@st.cache_resource\n",
+    "def get_llm_object(selected_model, temperature):\n",
+    "    llm = OpenAI(model=selected_model, temperature=temperature)\n",
+    "    return llm\n",
+    "\n",
+    "@st.cache_resource\n",
+    "def get_embedding_model(model_name, fine_tuned_path=None):\n",
+    "    if fine_tuned_path is None:\n",
+    "        print(f\"loading from `{model_name}` from huggingface\")\n",
+    "        embed_model = HuggingFaceEmbedding(model_name=model_name)\n",
+    "    else:\n",
+    "        print(f\"loading from local `{fine_tuned_path}`\")\n",
+    "        embed_model = fine_tuned_path\n",
+    "    return embed_model\n",
+    "\n",
+    "@st.cache_resource\n",
+    "def get_query_engine(input_files, llm_model, temperature,\n",
+    "                     embedding_model, fine_tuned_path,\n",
+    "                     system_content, persisted_vector_db):\n",
+    "    \n",
+    "    llm = get_llm_object(llm_model, temperature)\n",
+    "    embedded_model = get_embedding_model(\n",
+    "                        model_name=embedding_model, \n",
+    "                        fine_tuned_path=fine_tuned_path\n",
+    "    )\n",
+    "    Settings.llm = llm\n",
+    "    Settings.chunk_size = 1024\n",
+    "    Settings.embed_model = embedded_model\n",
+    "\n",
+    "    if os.path.exists(persisted_vector_db):\n",
+    "        print(\"loading from vector database - chroma\")\n",
+    "        db = chromadb.PersistentClient(path=persisted_vector_db)\n",
+    "        chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
+    "        vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
+    "        storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
+    "\n",
+    "        index = VectorStoreIndex.from_vector_store(\n",
+    "            vector_store=vector_store,\n",
+    "            storage_context=storage_context\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\"create new chroma vector database..\")\n",
+    "        documents = SimpleDirectoryReader(input_files=input_files).load_data()\n",
+    "        \n",
+    "        db = chromadb.PersistentClient(path=persisted_vector_db)\n",
+    "        chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
+    "        vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
+    "        \n",
+    "        nodes = Settings.node_parser.get_nodes_from_documents(documents)\n",
+    "        storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
+    "        storage_context.docstore.add_documents(nodes)\n",
+    "\n",
+    "        index = VectorStoreIndex(nodes, storage_context=storage_context)\n",
+    "    \n",
+    "    memory = ChatMemoryBuffer.from_defaults(token_limit=15000)\n",
+    "    hi_content_engine = index.as_query_engine(\n",
+    "                            memory=memory,\n",
+    "                            system_prompt=system_content,\n",
+    "                            similarity_top_k=20,\n",
+    "                            streaming=True\n",
+    "    )\n",
+    "    hi_textbook_query_description = \"\"\"\n",
+    "        Use this tool to extract content from textbook `Health Insurance 7th Edition`,\n",
+    "        that has 15 chapters in total. When user wants to learn more about a \n",
+    "        particular chapter, this tool will help to assist user to get better\n",
+    "        understanding of the content of the textbook.\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    hi_query_tool = QueryEngineTool.from_defaults(\n",
+    "                        query_engine=hi_content_engine,\n",
+    "                        name=\"health_insurance_textbook_query_engine\",\n",
+    "                        description=hi_textbook_query_description\n",
+    "    )\n",
+    "\n",
+    "    agent = OpenAIAgent.from_tools(tools=[\n",
+    "                                        hi_query_tool, \n",
+    "                                        get_qna_question_tool,\n",
+    "                                        evaluate_qna_answer_tool\n",
+    "                                    ],\n",
+    "                                   max_function_calls=1,\n",
+    "                                   llm=llm, \n",
+    "                                   verbose=True,\n",
+    "                                   system_prompt=textbook_content)\n",
+    "    print(\"loaded AI agent, let's begin the chat!\")\n",
+    "    print(\"=\"*50)\n",
+    "    print(\"\")\n",
+    "\n",
+    "    return agent\n",
+    "\n",
+    "def generate_llm_response(prompt_input, tool_choice=\"auto\"):\n",
+    "    chat_agent = get_query_engine(input_files=input_files, \n",
+    "                                   llm_model=selected_model, \n",
+    "                                   temperature=temperature,\n",
+    "                                   embedding_model=embedding_model,\n",
+    "                                   fine_tuned_path=fine_tuned_path,\n",
+    "                                   system_content=system_content,\n",
+    "                                   persisted_vector_db=persisted_vector_db)\n",
+    "    \n",
+    "    # st.session_state.messages\n",
+    "    response = chat_agent.stream_chat(prompt_input, tool_choice=tool_choice)\n",
+    "    return response\n",
+    "\n",
+    "def handle_feedback(user_response):\n",
+    "    st.toast(\"✔️ Feedback received!\")\n",
+    "    st.session_state.feedback = False\n",
+    "\n",
+    "def handle_image_upload():\n",
+    "    st.session_state.release_file = \"true\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f148426b-1634-45ed-a1fa-44e9c6ab14ac",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4461d081-d8d0-4801-ad52-dbe826cbfe59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "openai_api = os.getenv(\"OPENAI_API_KEY\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a24c861-896b-4800-8478-73f8cd65e8fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image_prompt = False\n",
+    "# llm_model = \"gpt-3.5-turbo-0125\"\n",
+    "llm_model = \"gpt-4-0125-preview\"\n",
+    "temperature = 0\n",
+    "\n",
+    "input_files = [\"./raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
+    "               \"./raw_documents/qna.txt\"]\n",
+    "embedding_model = \"BAAI/bge-small-en-v1.5\"\n",
+    "persisted_vector_db = \"../models/chroma_db\"\n",
+    "fine_tuned_path = \"local:../models/fine-tuned-embeddings\"\n",
+    "system_content = (\n",
+    "                    \"You are a helpful study assistant. \"\n",
+    "                    \"You do not respond as 'User' or pretend to be 'User'. \"\n",
+    "                    \"You only respond once as 'Assistant'.\"\n",
+    ")\n",
+    "textbook_content = (\n",
+    "                    \"The content of the textbook `Health Insurance 7th Edition` are as follows,\"\n",
+    "                    \"- Chapter 1: Overview Of Healthcare Environment In Singapore\"\n",
+    "                    \"- Chapter 2: Medical Expense Insurance\"\n",
+    "                    \"- Chapter 3: Group Medical Expense Insurance\"\n",
+    "                    \"- Chapter 4: Disability Income Insurance\"\n",
+    "                    \"- Chapter 5: Long-Term Care Insurance \"\n",
+    "                    \"- Chapter 6: Critical Illness Insurance\"\n",
+    "                    \"- Chapter 7: Other Types Of Health Insurance\"\n",
+    "                    \"- Chapter 8: Managed Healthcare\"\n",
+    "                    \"- Chapter 9: Part I Healthcare Financing\"\n",
+    "                    \"- Chapter 9: Part II Healthcare Financing\"\n",
+    "                    \"- Chapter 10: Common Policy Provisions\"\n",
+    "                    \"- Chapter 11: Health Insurance Pricing\"\n",
+    "                    \"- Chapter 12: Health Insurance Underwriting\"\n",
+    "                    \"- Chapter 13: Notice No: MAS 120 Disclosure And Advisory Process - Requirements For Accident And Health Insurance Products\"\n",
+    "                    \"- Chapter 14: Financial Needs Analysis\"\n",
+    "                    \"- Chapter 15: Case Studies\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5e4b22c-1e29-4ab8-9039-6e86f566871a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = get_llm_object(llm_model, temperature)\n",
+    "embedded_model = get_embedding_model(\n",
+    "                    model_name=embedding_model, \n",
+    "                    fine_tuned_path=fine_tuned_path\n",
+    ")\n",
+    "Settings.llm = llm\n",
+    "Settings.chunk_size = 1024\n",
+    "Settings.embed_model = embedded_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e92d21e3-8483-4f24-91cf-40a6c10d43c5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5753c6ed-41a6-40b5-bc4f-477eb7c1d5c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"loading from vector database - chroma\")\n",
+    "db = chromadb.PersistentClient(path=persisted_vector_db)\n",
+    "chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
+    "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
+    "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
+    "\n",
+    "index = VectorStoreIndex.from_vector_store(\n",
+    "    vector_store=vector_store,\n",
+    "    storage_context=storage_context\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d91e2dda-cb74-4d85-adce-a4a72c53cc7d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e4211bb2-aba9-4be2-b2f1-6fbd3f7e4223",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "memory = ChatMemoryBuffer.from_defaults(token_limit=15000)\n",
+    "hi_content_engine = index.as_query_engine(\n",
+    "                        memory=memory,\n",
+    "                        system_prompt=system_content,\n",
+    "                        similarity_top_k=8,\n",
+    "                        verbose=True,\n",
+    "                        streaming=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "007f8bf5-19c5-4462-b5f2-5f4ff30f593b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hi_textbook_query_description = \"\"\"\n",
+    "    Use this tool to extract content from the query engine,\n",
+    "    which is built by ingesting textbook content from `Health Insurance 7th Edition`,\n",
+    "    that has 15 chapters in total. When user wants to learn more about a \n",
+    "    particular chapter, this tool will help to assist user to get better\n",
+    "    understanding of the content of the textbook.\n",
+    "\"\"\"\n",
+    "\n",
+    "hi_query_tool = QueryEngineTool.from_defaults(\n",
+    "                    query_engine=hi_content_engine,\n",
+    "                    name=\"health_insurance_textbook_query_engine\",\n",
+    "                    description=hi_textbook_query_description\n",
+    ")\n",
+    "agent = OpenAIAgent.from_tools(tools=[\n",
+    "                                    hi_query_tool, \n",
+    "                                    get_qna_question_tool,\n",
+    "                                    evaluate_qna_answer_tool\n",
+    "                                ],\n",
+    "                               max_function_calls=1,\n",
+    "                               llm=llm, \n",
+    "                               verbose=True,\n",
+    "                               system_prompt=textbook_content)\n",
+    "\n",
+    "print(\"loaded AI agent, let's begin the chat!\")\n",
+    "print(\"=\"*50)\n",
+    "print(\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a2e42ad6-20fc-4f2e-a4ea-403e79b14ba4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c62e817e-c7c8-4f90-9e32-217fec376565",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = hi_content_engine.query(\"can you give me the list of chapters that `Health Insurance 7th Edition` covers\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5902ffd2-2f66-4b89-bf7f-a05e3fdeccaa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for res in response.response_gen:\n",
+    "    print(res, end=\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e75453b-85c7-4e1c-8683-6df45a13cacb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b97d90d-5c59-486f-863b-4aaa12ed0ea0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4584aa46-b488-4535-9d69-2736c9dad170",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = agent.stream_chat(\"hihi\", tool_choice=\"auto\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eff8bb8d-a2d1-428a-9c3d-193389378288",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for res in response.response_gen:\n",
+    "    print(res, end=\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b7a504af-6499-4649-8e68-2a86d415e458",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

qna_prompting.py CHANGED Viewed

@@ -7,73 +7,61 @@ import time
 db_path = "./database/mock_qna.sqlite"
 qna_question_description = """
-      Use this tool to extract the chapter number from the body of input text,
-      thereafter, chapter number will be used as a filtering criteria for
-      extracting the right questions set from database.
-      The format of the function argument looks as follow:
-        It should be in the format with `Chapter_` as prefix.
         Example 1: `Chapter_1` for first chapter
         Example 2: For chapter 12 of the textbook, you should return `Chapter_12`
         Example 3: `Chapter_5` for fifth chapter
-        Thereafter, the chapter_n argument will be passed to the function for Q&A question retrieval.
 """
 qna_answer_description = """
-      Use this tool to trigger the evaluation of user's provided input with the
-      correct answer of the Q&A question asked. When user provides answer to the
-      question asked, they can reply in natural language or giving the alphabet
-      symbol of which selected answer they think it's most reasonable.
-      The format of the function argument `user_selected_answer` looks as follow:
-        It should be in the format with character such as A, B, C and D.
         Example 1: User's answer is `a`, it means choice `A`.
         Example 2: User's answer is contextually closer to 3rd answer choice, it means `C`.
         Example 3: User says last is the answer, it means `D`.
-        Thereafter, the `user_selected_answer` argument will be passed to the
-        function for Q&A question evaluation.
 """
 class Question_Model(BaseModel):
-    chapter_n: str = \
-                Field(...,
-                      pattern=r'^Chapter_\d*$',
-                      description=(
-                          "which chapter to extract, the format of this function argumet"
-                          "is with `Chapter_` as prefix concatenated with chapter number"
-                          "in integer. For example, `Chapter_2`, `Chapter_10`."
-                          "if no chapter number specified or user requested for random question"
-                          "or user has no preference over which chapter of textbook to be tested"
-                          "return `Chapter_0`"
-                          )
                     )
 class Answer_Model(BaseModel):
-    user_selected_answer: str = \
-                          Field(...,
-                                pattern=r'^[ABCD]$',
-                                description=(
-                                    "which answer choice `A`, `B`, `C`, `D`"
-                                    "user selected. The return format should be"
-                                    "in single character such as A, B, C and D."
-                                    "if user's answer is contextually closer to a "
-                                    "particular answer choice, return the corresponding"
-                                    "alphabet A, B, C or D for the answer "
-                                    "is closest."
-                                ))
 def get_qna_question(chapter_n: str) -> str:
-    """
-      Use this tool to extract the chapter number from the body of input text,
-      thereafter, chapter number will be used as a filtering criteria for
-      extracting the right questions set from database.
-      The format of the function argument looks as follow:
-        It should be in the format with `Chapter_` as prefix.
-        Example 1: `Chapter_1` for first chapter
-        Example 2: For chapter 12 of the textbook, you should return `Chapter_12`
-        Example 3: `Chapter_5` for fifth chapter
-        Thereafter, the chapter_n argument will be passed to the function for Q&A question retrieval.
-        Once the question is retrieved from database, be reminded to ask user the question.
-    """
     con = sqlite3.connect(db_path)
     cur = con.cursor()
     filter_clause = "WHERE a.id IS NULL" if chapter_n == "Chapter_0" else f"WHERE a.id IS NULL AND chapter='{chapter_n}'"
     sql_string = """SELECT q.id, question, option_1, option_2, option_3, option_4, q.correct_answer
@@ -92,8 +80,8 @@ def get_qna_question(chapter_n: str) -> str:
     option_4 = result[5]
     c_answer = result[6]
-    qna_str  = "Question: \n" + \
-               "========= \n" + \
                 question.replace("\\n", "\n") + "\n" + \
                "A) " + option_1 + "\n" + \
                "B) " + option_2 + "\n" + \
@@ -108,31 +96,17 @@ def get_qna_question(chapter_n: str) -> str:
     return qna_str
 def evaluate_qna_answer(user_selected_answer: str) -> str:
-    """
-      Use this tool to trigger the evaluation of user's provided input with the
-      correct answer of the Q&A question asked. When user provides answer to the
-      question asked, they can reply in natural language or giving the alphabet
-      symbol of which selected answer they think it's most reasonable.
-      The format of the function argument `user_selected_answer` looks as follow:
-        It should be in the format with character such as A, B, C and D.
-        Example 1: User's answer is `a`, it means choice `A`.
-        Example 2: User's answer is contextually closer to 3rd answer choice, it means `C`.
-        Example 3: User says last is the answer, it means `D`.
-        Thereafter, the `user_selected_answer` argument will be passed to the
-        function for Q&A question evaluation.
-    """
     answer_mapping = {
         "A": 1,
         "B": 2,
         "C": 3,
-        "D": 4
     }
     num_mapping = dict((v,k) for k,v in answer_mapping.items())
-    user_answer_numeric = answer_mapping.get(user_selected_answer, None)
-    if user_answer_numeric is None:
-        raise Exception(f"User's answer can't be found: {user_selected_answer}")
     question_id = st.session_state.question_id
     qna_answer  = st.session_state.qna_answer
     qna_answer_alphabet = num_mapping[qna_answer]
@@ -148,21 +122,20 @@ def evaluate_qna_answer(user_selected_answer: str) -> str:
     con.close()
     if qna_answer == user_answer_numeric:
-        st.toast('Hooray!', icon='🎉')
-        time.sleep(0.3)
-        st.toast('Hooray!', icon='🎉')
-        time.sleep(0.3)
-        st.toast('Hooray!', icon='🎉')
         st.balloons()
     else:
-        st.toast('Omg..', icon='😅')
-        time.sleep(0.3)
-        st.toast('Omg..', icon='😅')
-        time.sleep(0.3)
-        st.toast('Omg..', icon='😅')
         st.snow()
     qna_answer_response = (
         f"Your selected answer is `{user_selected_answer}`, "
         f"but the actual answer is `{qna_answer_alphabet}`. "

 db_path = "./database/mock_qna.sqlite"
 qna_question_description = """
+    Only trigger this when user wants to be tested with a question.
+    Use this tool to extract the chapter number from the body of input text,
+    thereafter, chapter number will be used as a filtering criteria for
+    extracting the right questions set from database.
+    Thereafter, the chapter_n argument will be passed to the function for Q&A question retrieval.
+    If no chapter number specified or user requested for random question,
+    or user has no preference over which chapter of textbook to be tested,
+    set function argument `chapter_n` to be `Chapter_0`.
+"""
+qna_question_data_format = """
+    The format of the function argument `chapter_n` looks as follow:
+    It should be in the format with `Chapter_` as prefix.
         Example 1: `Chapter_1` for first chapter
         Example 2: For chapter 12 of the textbook, you should return `Chapter_12`
         Example 3: `Chapter_5` for fifth chapter
 """
 qna_answer_description = """
+    Use this tool to trigger the evaluation of user's provided input with the
+    correct answer of the Q&A question asked. When user provides answer to the
+    question asked, they can reply in natural language or giving the alphabet
+    letter of which selected choice they think it's the right answer.
+    If user's answer is not a single alphabet letter, but is contextually
+    closer to a particular answer choice, return the corresponding
+    alphabet A, B, C, D or Z for which the answer's meaning is closest to.
+    Thereafter, the `user_selected_answer` argument will be passed to the
+    function for Q&A question evaluation.
+"""
+qna_answer_data_format = """
+    The format of the function argument `user_selected_answer` looks as follow:
+        It should be in the format of single character such as `A`, `B`, `C`, `D` or `Z`.
         Example 1: User's answer is `a`, it means choice `A`.
         Example 2: User's answer is contextually closer to 3rd answer choice, it means `C`.
         Example 3: User says last is the answer, it means `D`.
+        Example 4: If user doesn't know about the answer, it means `Z`.
 """
 class Question_Model(BaseModel):
+    chapter_n: str = Field(...,
+                           pattern=r'^Chapter_\d*$',
+                           description=qna_question_data_format
                     )
 class Answer_Model(BaseModel):
+    user_selected_answer: str = Field(...,
+                                      pattern=r'^[ABCDZ]$',
+                                      description=qna_answer_data_format
+                            )
 def get_qna_question(chapter_n: str) -> str:
     con = sqlite3.connect(db_path)
     cur = con.cursor()
     filter_clause = "WHERE a.id IS NULL" if chapter_n == "Chapter_0" else f"WHERE a.id IS NULL AND chapter='{chapter_n}'"
     sql_string = """SELECT q.id, question, option_1, option_2, option_3, option_4, q.correct_answer
     option_4 = result[5]
     c_answer = result[6]
+    qna_str  = "As requested, here is the retrieved question: \n" + \
+               "============================================= \n" + \
                 question.replace("\\n", "\n") + "\n" + \
                "A) " + option_1 + "\n" + \
                "B) " + option_2 + "\n" + \
     return qna_str
 def evaluate_qna_answer(user_selected_answer: str) -> str:
     answer_mapping = {
         "A": 1,
         "B": 2,
         "C": 3,
+        "D": 4,
+        "Z": 0
     }
     num_mapping = dict((v,k) for k,v in answer_mapping.items())
+    user_answer_numeric = answer_mapping.get(user_selected_answer, 0)
     question_id = st.session_state.question_id
     qna_answer  = st.session_state.qna_answer
     qna_answer_alphabet = num_mapping[qna_answer]
     con.close()
     if qna_answer == user_answer_numeric:
+        st.toast("🍯 yummy yummy, hooray!", icon="🎉")
+        time.sleep(2)
+        st.toast("🐻💕🍯 You got it right!", icon="🎊")
+        time.sleep(2)
+        st.toast("🥇 You are amazing! 💯💯", icon="💪")
         st.balloons()
     else:
+        st.toast("🐼 Something doesn't seem right.. 🔥🏠🔥", icon="😂")
+        time.sleep(2)
+        st.toast("🥶 Are you sure..? 😬😬", icon="😭")
+        time.sleep(2)
+        st.toast("🤜🤛 Nevertheless, it was a good try!! 🏋️‍♂️🏋️‍♂️", icon="👏")
         st.snow()
     qna_answer_response = (
         f"Your selected answer is `{user_selected_answer}`, "
         f"but the actual answer is `{qna_answer_alphabet}`. "

raw_documents/overview_background.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4a5e6e0a28727dd6eab4bc18bf5ffcf897a4dbed61a854fa52629d2698f0925
+size 5970

resource/disney-cuties-little-winnie-the-pooh-emoticon.png ADDED Viewed

resource/disney-cuties-piglet-emoticon.png ADDED Viewed

streamlit_app.py CHANGED Viewed

@@ -28,7 +28,7 @@ import nest_asyncio
 nest_asyncio.apply()
 # App title
-st.set_page_config(page_title="💬 Open AI Chatbot")
 openai_api = os.getenv("OPENAI_API_KEY")
 # "./raw_documents/HI_Knowledge_Base.pdf"
@@ -38,9 +38,29 @@ input_files = ["./raw_documents/HI Chapter Summary Version 1.3.pdf",
 embedding_model = "BAAI/bge-small-en-v1.5"
 persisted_vector_db = "./models/chroma_db"
 fine_tuned_path = "local:models/fine-tuned-embeddings"
-system_content = ("You are a helpful study assistant. "
-                  "You do not respond as 'User' or pretend to be 'User'. "
-                  "You only respond once as 'Assistant'."
 )
 data_df = pd.DataFrame(
@@ -50,10 +70,34 @@ data_df = pd.DataFrame(
 )
 data_df.index = ["Chapter 1", "Chapter 2", "Chapter 3", "Chapter 4"]
 # Replicate Credentials
 with st.sidebar:
-    st.title("💬 Open AI Chatbot")
-    st.write("This chatbot is created using the GPT model from Open AI.")
     if openai_api:
         pass
     elif "OPENAI_API_KEY" in st.secrets:
@@ -71,7 +115,7 @@ with st.sidebar:
     st.subheader("Models and parameters")
     selected_model = st.sidebar.selectbox("Choose an OpenAI model",
-                                          ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"],
                                            key="selected_model")
     temperature = st.sidebar.slider("temperature", min_value=0.0, max_value=2.0,
                                     value=0.0, step=0.01)
@@ -98,7 +142,7 @@ if "init" not in st.session_state.keys():
 # Store LLM generated responses
 if "messages" not in st.session_state.keys():
     st.session_state.messages = [{"role": "assistant",
-                                  "content": "How may I assist you today?",
                                   "type": "text"}]
 if "feedback_key" not in st.session_state:
@@ -115,7 +159,7 @@ if "qna_answer" not in st.session_state:
 def clear_chat_history():
     st.session_state.messages = [{"role": "assistant",
-                                  "content": "How may I assist you today?",
                                   "type": "text"}]
     chat_engine = get_query_engine(input_files=input_files,
                                    llm_model=selected_model,
@@ -191,23 +235,25 @@ def get_query_engine(input_files, llm_model, temperature,
         index = VectorStoreIndex(nodes, storage_context=storage_context)
-    memory = ChatMemoryBuffer.from_defaults(token_limit=15000)
     hi_content_engine = index.as_query_engine(
                             memory=memory,
                             system_prompt=system_content,
-                            similarity_top_k=3,
                             streaming=True
     )
     hi_textbook_query_description = """
-        Use this tool to extract content from Health Insurance textbook
         that has 15 chapters in total. When user wants to learn more about a
         particular chapter, this tool will help to assist user to get better
-        understanding of the content of the textbook.
     """
     hi_query_tool = QueryEngineTool.from_defaults(
                         query_engine=hi_content_engine,
-                        name="vector_tool",
                         description=hi_textbook_query_description
     )
@@ -218,7 +264,8 @@ def get_query_engine(input_files, llm_model, temperature,
                                     ],
                                    max_function_calls=1,
                                    llm=llm,
-                                   verbose=True)
     print("loaded AI agent, let's begin the chat!")
     print("="*50)
     print("")
@@ -277,7 +324,12 @@ with st.sidebar:
 for message in st.session_state.messages:
     if message["role"] == "admin":
         continue
-    with st.chat_message(message["role"]):
         if message["type"] == "text":
             st.write(message["content"])
         elif message["type"] == "image":
@@ -286,11 +338,10 @@ for message in st.session_state.messages:
 # User-provided prompt
 if prompt := st.chat_input(disabled=not openai_api):
-    client = OpenAI()
     st.session_state.messages.append({"role": "user",
                                       "content": prompt,
                                       "type": "text"})
-    with st.chat_message("user"):
         st.write(prompt)
 # Retrieve text prompt from image submission
@@ -301,17 +352,22 @@ if prompt is None and \
 # Generate a new response if last message is not from assistant
 if st.session_state.messages[-1]["role"] != "assistant":
-    with st.chat_message("assistant"):
-        with st.spinner("Thinking..."):
             if image_prompt:
-                response = generate_llm_response(prompt, tool_choice="vector_tool")
                 image_prompt = False
             else:
                 response = generate_llm_response(prompt, tool_choice="auto")
             placeholder = st.empty()
             full_response = ""
             for token in response.response_gen:
-                token = token.replace("\n", "  \n")
                 full_response += token
                 placeholder.markdown(full_response)
             placeholder.markdown(full_response)

 nest_asyncio.apply()
 # App title
+st.set_page_config(page_title="🐻🍯 Study Bear")
 openai_api = os.getenv("OPENAI_API_KEY")
 # "./raw_documents/HI_Knowledge_Base.pdf"
 embedding_model = "BAAI/bge-small-en-v1.5"
 persisted_vector_db = "./models/chroma_db"
 fine_tuned_path = "local:models/fine-tuned-embeddings"
+system_content = (
+                    "You are a helpful study assistant. "
+                    "You do not respond as 'User' or pretend to be 'User'. "
+                    "You only respond once as 'Assistant'."
+)
+textbook_content = (
+                    "The content of the textbook `Health Insurance 7th Edition` are as follows,"
+                    "- Chapter 1: Overview Of Healthcare Environment In Singapore"
+                    "- Chapter 2: Medical Expense Insurance"
+                    "- Chapter 3: Group Medical Expense Insurance"
+                    "- Chapter 4: Disability Income Insurance"
+                    "- Chapter 5: Long-Term Care Insurance"
+                    "- Chapter 6: Critical Illness Insurance"
+                    "- Chapter 7: Other Types Of Health Insurance"
+                    "- Chapter 8: Managed Healthcare"
+                    "- Chapter 9: Part I Healthcare Financing"
+                    "- Chapter 9: Part II Healthcare Financing"
+                    "- Chapter 10: Common Policy Provisions"
+                    "- Chapter 11: Health Insurance Pricing"
+                    "- Chapter 12: Health Insurance Underwriting"
+                    "- Chapter 13: Notice No: MAS 120 Disclosure And Advisory Process - Requirements For Accident And Health Insurance Products"
+                    "- Chapter 14: Financial Needs Analysis"
+                    "- Chapter 15: Case Studies"
 )
 data_df = pd.DataFrame(
 )
 data_df.index = ["Chapter 1", "Chapter 2", "Chapter 3", "Chapter 4"]
+bear_img_path = "./resource/disney-cuties-little-winnie-the-pooh-emoticon.png"
+piglet_img_path = "./resource/disney-cuties-piglet-emoticon.png"
+introduction_line = (
+                    "Hello, my name is Winnie. I am your `Study Bear` 🐻.  \n"
+                    "Let's study together and pass the exam without worries.  \n"
+                    "As the saying goes:  \n"
+                    "> Any day spent with you is my favorite day. So, today is my new favorite day.  \n"
+                    ">  \n"
+                    "Let me know what should we study today 😉.  \n"
+                    "  \n"
+                    "The content of the textbook `Health Insurance 7th Edition` are as follows,  \n"
+                    "- Chapter 1: Overview Of Healthcare Environment In Singapore  \n"
+                    "- Chapter 2: Medical Expense Insurance  \n"
+                    "- Chapter 3: Group Medical Expense Insurance  \n"
+                    "- Chapter 4: Disability Income Insurance  \n"
+                    "- Etc ... \n"
+                    "  \n"
+                    "For examples, you could ask me \n"
+                    "- *How many chapters are there in textbook 'Health Insurance 7th Edition'?*  \n"
+                    "- *Can you list all the chapters by name and its number for me?*  \n"
+                    "- *Please extract the important key concept from chapter 1 into 10 bullet points*  \n"
+                    "- *Please ask me a question so that I can tell if I have enough understanding about Chapter 2*  \n"
+)
 # Replicate Credentials
 with st.sidebar:
+    st.title("🍯🐝 Study Bear 🐻💭")
+    st.write("Just like Pooh needs honey, success requires hard work – no shortcuts allowed!")
     if openai_api:
         pass
     elif "OPENAI_API_KEY" in st.secrets:
     st.subheader("Models and parameters")
     selected_model = st.sidebar.selectbox("Choose an OpenAI model",
+                                          ["gpt-4-0125-preview", "gpt-3.5-turbo-0125"],
                                            key="selected_model")
     temperature = st.sidebar.slider("temperature", min_value=0.0, max_value=2.0,
                                     value=0.0, step=0.01)
 # Store LLM generated responses
 if "messages" not in st.session_state.keys():
     st.session_state.messages = [{"role": "assistant",
+                                  "content": introduction_line,
                                   "type": "text"}]
 if "feedback_key" not in st.session_state:
 def clear_chat_history():
     st.session_state.messages = [{"role": "assistant",
+                                  "content": introduction_line,
                                   "type": "text"}]
     chat_engine = get_query_engine(input_files=input_files,
                                    llm_model=selected_model,
         index = VectorStoreIndex(nodes, storage_context=storage_context)
+    memory = ChatMemoryBuffer.from_defaults(token_limit=100_000)
     hi_content_engine = index.as_query_engine(
                             memory=memory,
                             system_prompt=system_content,
+                            similarity_top_k=10,
+                            verbose=True,
                             streaming=True
     )
     hi_textbook_query_description = """
+        Use this tool to extract content from the query engine,
+        which is built by ingesting textbook content from `Health Insurance 7th Edition`,
         that has 15 chapters in total. When user wants to learn more about a
         particular chapter, this tool will help to assist user to get better
+        understanding of the content of the textbook.
     """
     hi_query_tool = QueryEngineTool.from_defaults(
                         query_engine=hi_content_engine,
+                        name="health_insurance_textbook_query_engine",
                         description=hi_textbook_query_description
     )
                                     ],
                                    max_function_calls=1,
                                    llm=llm,
+                                   verbose=True,
+                                   system_prompt=textbook_content)
     print("loaded AI agent, let's begin the chat!")
     print("="*50)
     print("")
 for message in st.session_state.messages:
     if message["role"] == "admin":
         continue
+    elif message["role"] == "user":
+        avatar = piglet_img_path
+    elif message["role"] == "assistant":
+        avatar = bear_img_path
+    with st.chat_message(message["role"], avatar=avatar):
         if message["type"] == "text":
             st.write(message["content"])
         elif message["type"] == "image":
 # User-provided prompt
 if prompt := st.chat_input(disabled=not openai_api):
     st.session_state.messages.append({"role": "user",
                                       "content": prompt,
                                       "type": "text"})
+    with st.chat_message("user", avatar=piglet_img_path):
         st.write(prompt)
 # Retrieve text prompt from image submission
 # Generate a new response if last message is not from assistant
 if st.session_state.messages[-1]["role"] != "assistant":
+    with st.chat_message("assistant", avatar=bear_img_path):
+        with st.spinner("🧸💤 Thinking... 🐻💭"):
             if image_prompt:
+                response = generate_llm_response(
+                                prompt,
+                                tool_choice="health_insurance_textbook_query_engine"
+                            )
                 image_prompt = False
             else:
                 response = generate_llm_response(prompt, tool_choice="auto")
             placeholder = st.empty()
             full_response = ""
             for token in response.response_gen:
+                token = token.replace("\n", "  \n") \
+                             .replace("$", "\$") \
+                             .replace("\[", "$$")
                 full_response += token
                 placeholder.markdown(full_response)
             placeholder.markdown(full_response)