Spaces:

MikeCraBash
/

mikecmt

Runtime error

App Files Files Community

MikeCraBash commited on May 4, 2024

Commit

310bd34

1 Parent(s): 228c8c1

final

Browse files

Files changed (2) hide show

app.py +66 -59
requirements.txt +205 -4

app.py CHANGED Viewed

@@ -1,38 +1,85 @@
-#
 # AI MAKERSPACE MIDTERM PROJECT: META RAG CHATBOT
 # Date: 2024-5-2
 # Authors: MikeC
 # Basic Imports & Setup
 import os
 from openai import AsyncOpenAI
 # Using Chainlit for our UI
-import chainlit as cl
-from chainlit.prompt import Prompt, PromptMessage
-from chainlit.playground.providers import ChatOpenAI
 # Getting the API key from the .env file
 from dotenv import load_dotenv
 load_dotenv()
-# RAG is the Rage
-# ChatOpenAI Templates
-system_template = """You are a helpful assistant who always speaks in a pleasant tone!
-"""
-user_template = """{input}
-Think through your response step by step.
 """
-# Chainlit App
-@cl.on_chat_start  # marks a function that will be executed at the start of a user session
 async def start_chat():
     settings = {
         "model": "gpt-3.5-turbo",
@@ -42,54 +89,14 @@ async def start_chat():
         "frequency_penalty": 0,
         "presence_penalty": 0,
     }
     cl.user_session.set("settings", settings)
-@cl.on_message  # marks a function that should be run each time the chatbot receives a message from a user
 async def main(message: cl.Message):
-    settings = cl.user_session.get("settings")
-    client = AsyncOpenAI()
-    print(message.content)
-    prompt = Prompt(
-        provider=ChatOpenAI.id,
-        messages=[
-            PromptMessage(
-                role="system",
-                template=system_template,
-                formatted=system_template,
-            ),
-            PromptMessage(
-                role="user",
-                template=user_template,
-                formatted=user_template.format(input=message.content),
-            ),
-        ],
-        inputs={"input": message.content},
-        settings=settings,
-    )
-    print([m.to_openai() for m in prompt.messages])
-    msg = cl.Message(content="")
-    # Question and Answer Chatbot
-    # Call OpenAI
-    async for stream_resp in await client.chat.completions.create(
-        messages=[m.to_openai() for m in prompt.messages], stream=True, **settings
-    ):
-        token = stream_resp.choices[0].delta.content
-        if not token:
-            token = ""
-        await msg.stream_token(token)
-    # Update the prompt object with the completion
-    prompt.completion = msg.content
-    msg.prompt = prompt
-    # Send and close the message stream
     await msg.send()

 # AI MAKERSPACE MIDTERM PROJECT: META RAG CHATBOT
 # Date: 2024-5-2
 # Authors: MikeC
 # Basic Imports & Setup
 import os
 from openai import AsyncOpenAI
 # Using Chainlit for our UI
+import chainlit as cl
+from chainlit.prompt import Prompt, PromptMessage
+from chainlit.playground.providers import ChatOpenAI
 # Getting the API key from the .env file
 from dotenv import load_dotenv
 load_dotenv()
+# RAG pipeline imports and setup code
+from langchain.document_loaders import PyMuPDFLoader
+docs = PyMuPDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf").load()
+import tiktoken
+def tiktoken_len(text):
+    tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
+        text,
+    )
+    return len(tokens)
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 200,
+    chunk_overlap = 0,
+    length_function = tiktoken_len,
+)
+split_chunks = text_splitter.split_documents(docs)
+from langchain_openai.embeddings import OpenAIEmbeddings
+embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+from langchain_community.vectorstores import Qdrant
+qdrant_vectorstore = Qdrant.from_documents(
+    split_chunks,
+    embedding_model,
+    location=":memory:",
+    collection_name="MetaFin",
+)
+qdrant_retriever = qdrant_vectorstore.as_retriever()
+from langchain_openai import ChatOpenAI
+openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
+from langchain_core.prompts import ChatPromptTemplate
+RAG_PROMPT = """
+CONTEXT:
+{context}
+QUERY:
+{question}
+Use the provided context to answer the user's query. You are a professional financial expert. You always review the provided financial information.  You provide correct, substantiated answers. You may not answer the user's query unless there is a specific context in the following text. If asked about the Board of Directors, then add Mark Zuckerberg as the "Board Chair".
+If you do not know the answer, or cannot answer, please respond with "Insufficient data for further analysis, please try again". >>
 """
+rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+from operator import itemgetter
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+retrieval_augmented_qa_chain = (
+    {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
+    | RunnablePassthrough.assign(context=itemgetter("context"))
+    | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
+)
+# Chainlit App
+@cl.on_chat_start
 async def start_chat():
     settings = {
         "model": "gpt-3.5-turbo",
         "frequency_penalty": 0,
         "presence_penalty": 0,
     }
     cl.user_session.set("settings", settings)
+@cl.on_message
 async def main(message: cl.Message):
+    chainlit_question = message.content
+    #chainlit_question = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
+    response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
+    chainlit_answer = response["response"].content
+    msg = cl.Message(content=chainlit_answer)
     await msg.send()

requirements.txt CHANGED Viewed

@@ -1,5 +1,206 @@
-chainlit==0.7.700
 cohere==4.37
-openai==1.3.5
-tiktoken==0.5.1
-python-dotenv==1.0.0

+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.6.0
+anyio==3.7.1
+appdirs==1.4.4
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+async-lru==2.0.4
+asyncer==0.0.2
+attrs==23.2.0
+Babel==2.14.0
+backoff==2.2.1
+beautifulsoup4==4.12.3
+bidict==0.23.1
+bleach==6.1.0
+blinker==1.8.1
+cachetools==5.3.3
+certifi==2024.2.2
+cffi==1.16.0
+chainlit
+charset-normalizer==3.3.2
+click==8.1.7
 cohere==4.37
+contourpy==1.2.1
+curl_cffi==0.6.2
+cycler==0.12.1
+dataclasses-json==0.5.14
+datasets==2.19.0
+defusedxml==0.7.1
+Deprecated==1.2.14
+dill==0.3.8
+dirtyjson==1.0.8
+distro==1.9.0
+docker==7.0.0
+docker-pycreds==0.4.0
+duckduckgo_search==5.3.0
+fastapi==0.100.1
+fastapi-socketio==0.0.10
+fastavro==1.9.4
+fastjsonschema==2.19.1
+filelock==3.13.4
+filetype==1.2.0
+fonttools==4.51.0
+fqdn==1.5.1
+frozenlist==1.4.1
+fsspec==2024.3.1
+gitdb==4.0.11
+GitPython==3.1.43
+googleapis-common-protos==1.63.0
+grandalf==0.8
+greenlet==3.0.3
+grpcio==1.62.2
+grpcio-tools==1.62.2
+h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
+httpcore==0.17.3
+httpx
+huggingface-hub==0.22.2
+hyperframe==6.0.1
+idna==3.6
+importlib-metadata==6.11.0
+install==1.3.5
+ipywidgets==8.1.2
+isoduration==20.11.0
+Jinja2==3.1.3
+joblib==1.4.0
+json5==0.9.25
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_server==2.14.0
+jupyter_server_terminals==0.5.3
+jupyterlab
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.26.0
+jupyterlab_widgets==3.0.10
+kiwisolver==1.4.5
+langchain==0.1.17
+langchain-community==0.0.36
+langchain-core==0.1.50
+langchain-openai==0.1.6
+langchain-text-splitters==0.0.1
+langchainhub==0.1.15
+langsmith==0.1.48
+Lazify==0.4.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.1
+matplotlib==3.8.4
+mdurl==0.1.2
+mistune==3.0.2
+multidict==6.0.5
+multiprocess==0.70.16
+mypy-extensions==1.0.0
+nbclient==0.10.0
+nbconvert==7.16.3
+nbformat==5.10.4
+networkx
+nltk==3.8.1
+notebook==7.1.2
+notebook_shim==0.2.4
+numpy==1.26.4
+openai==1.25.1
+opentelemetry-api==1.24.0
+opentelemetry-exporter-otlp==1.24.0
+opentelemetry-exporter-otlp-proto-common==1.24.0
+opentelemetry-exporter-otlp-proto-grpc==1.24.0
+opentelemetry-exporter-otlp-proto-http==1.24.0
+opentelemetry-instrumentation==0.45b0
+opentelemetry-proto==1.24.0
+opentelemetry-sdk==1.24.0
+opentelemetry-semantic-conventions==0.45b0
+orjson==3.10.1
+overrides==7.7.0
+packaging==23.2
+pandas==2.2.2
+pandocfilters==1.5.1
+pillow==10.3.0
+plotly==5.22.0
+portalocker==2.8.2
+prometheus_client==0.20.0
+protobuf==4.25.3
+pyarrow==16.0.0
+pyarrow-hotfix==0.6
+pycparser==2.22
+pydantic==2.6.4
+pydantic_core==2.16.3
+pydeck==0.9.0
+PyJWT==2.8.0
+PyMuPDF==1.24.2
+PyMuPDFb==1.24.1
+pyparsing==3.1.2
+pypdf==4.2.0
+pysbd==0.3.4
+python-dotenv==1.0.0
+python-engineio==4.9.0
+python-graphql-client==0.4.3
+python-json-logger==2.0.7
+python-magic==0.4.27
+python-multipart==0.0.6
+python-socketio==5.11.2
+pytz==2024.1
+PyYAML==6.0.1
+qdrant-client==1.9.1
+qtconsole==5.5.1
+QtPy==2.4.1
+ragas==0.1.7
+referencing==0.34.0
+regex==2024.4.16
+requests==2.31.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.1
+rpds-py==0.18.0
+scikit-learn==1.4.2
+scipy==1.13.0
+Send2Trash==1.8.3
+sentry-sdk==1.45.0
+setproctitle==1.3.3
+simple-websocket==1.0.0
+smmap==5.0.1
+sniffio==1.3.1
+soupsieve==2.5
+SQLAlchemy==2.0.29
+starlette==0.27.0
+streamlit==1.33.0
+striprtf==0.0.26
+syncer==2.0.3
+tenacity==8.2.3
+terminado==0.18.1
+threadpoolctl==3.4.0
+tiktoken==0.6.0
+tinycss2==1.2.1
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.1
+tqdm==4.66.2
+types-python-dateutil==2.9.0.20240316
+types-requests==2.31.0.20240406
+typing-inspect==0.9.0
+tzdata==2024.1
+uptrace==1.24.0
+uri-template==1.3.0
+urllib3==2.2.1
+uvicorn==0.23.2
+wandb==0.16.6
+watchfiles==0.20.0
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+websockets==12.0
+widgetsnbextension==4.0.10
+wikipedia==1.4.0
+wrapt==1.16.0
+wsproto==1.2.0
+xxhash==3.4.1
+yarl==1.9.4