Spaces:

MikeCraBash
/

prepr

Runtime error

App Files Files Community

MikeCraBash commited on May 12, 2024

Commit

fc77a86

1 Parent(s): 1390311

starting

Browse files

Files changed (6) hide show

.chainlit/config.toml +78 -0
Dockerfile +12 -0
__pycache__/app.cpython-311.pyc +0 -0
app.py +111 -0
chainlit.md +3 -0
requirements.txt +206 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,78 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = true
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the conversation is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+[meta]
+generated_by = "0.7.501"

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["chainlit", "run", "app.py", "--port", "7860"]

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (4.66 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# AI MAKERSPACE PREPR
+# Date: 2024-5-16
+# Basic Imports & Setup
+import os
+from openai import AsyncOpenAI
+# Using Chainlit for our UI
+import chainlit as cl
+from chainlit.prompt import Prompt, PromptMessage
+from chainlit.playground.providers import ChatOpenAI
+# Getting the API key from the .env file
+from dotenv import load_dotenv
+load_dotenv()
+# RAG pipeline imports and setup code
+# Get the DeveloperWeek PDF file (future implementation: direct download from URL)
+from langchain.document_loaders import PyMuPDFLoader
+# Adjust the URL to the direct download format
+file_id = "1JeA-w4kvbI3GHk9Dh_j19_Q0JUDE7hse"
+direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"
+# Now load the document using the direct URL
+docs = PyMuPDFLoader(direct_url).load()
+import tiktoken
+def tiktoken_len(text):
+    tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
+        text,
+    )
+    return len(tokens)
+# Split the document into chunks
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 500,           # 500 tokens per chunk, experiment with this value
+    chunk_overlap = 50,        # 50 tokens overlap between chunks, experiment with this value
+    length_function = tiktoken_len,
+)
+split_chunks = text_splitter.split_documents(docs)
+# Load the embeddings model
+from langchain_openai.embeddings import OpenAIEmbeddings
+embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+# Load the vector store and retriever from Qdrant
+from langchain_community.vectorstores import Qdrant
+qdrant_vectorstore = Qdrant.from_documents(
+    split_chunks,
+    embedding_model,
+    location=":memory:",
+    collection_name="Prepr",
+)
+qdrant_retriever = qdrant_vectorstore.as_retriever()
+from langchain_openai import ChatOpenAI
+openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
+from langchain_core.prompts import ChatPromptTemplate
+RAG_PROMPT = """
+CONTEXT:
+{context}
+QUERY:
+{question}
+Use the provided context to answer the user's query. You are a professional personal assistant for an executive professional in a high tech company. You help them plan for events and meetings.
+You always review the provided event information. You can look up dates and location where event sessions take place from the document. If you do not know the answer, or cannot answer, please respond with "Insufficient data for further analysis, please try again".  You end your successful responses with "Is there anything else that I can help you with?". If the user says NO, or any other negative response, then you ask "How did I do?" >>
+"""
+rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+from operator import itemgetter
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+retrieval_augmented_qa_chain = (
+    {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
+    | RunnablePassthrough.assign(context=itemgetter("context"))
+    | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
+)
+# Chainlit App
+@cl.on_chat_start
+async def start_chat():
+    settings = {
+        "model": "gpt-3.5-turbo",
+        "temperature": 0,
+        "max_tokens": 500,
+        "top_p": 1,
+        "frequency_penalty": 0,
+        "presence_penalty": 0,
+    }
+    cl.user_session.set("settings", settings)
+@cl.on_message
+async def main(message: cl.Message):
+    chainlit_question = message.content
+    #chainlit_question = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
+    response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
+    chainlit_answer = response["response"].content
+    msg = cl.Message(content=chainlit_answer)
+    await msg.send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # AI Makerspace Demo Day - Prepr
2	+
3	+ Welcome to Prepr your personal preparation assistant. I can help you prepare for a conference, a meeting or an interview ... How can I help you today?

requirements.txt ADDED Viewed

	@@ -0,0 +1,206 @@

+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.6.0
+anyio==3.7.1
+appdirs==1.4.4
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+async-lru==2.0.4
+asyncer==0.0.2
+attrs==23.2.0
+Babel==2.14.0
+backoff==2.2.1
+beautifulsoup4==4.12.3
+bidict==0.23.1
+bleach==6.1.0
+blinker==1.8.1
+cachetools==5.3.3
+certifi==2024.2.2
+cffi==1.16.0
+chainlit
+charset-normalizer==3.3.2
+click==8.1.7
+cohere==4.37
+contourpy==1.2.1
+curl_cffi==0.6.2
+cycler==0.12.1
+dataclasses-json==0.5.14
+datasets==2.19.0
+defusedxml==0.7.1
+Deprecated==1.2.14
+dill==0.3.8
+dirtyjson==1.0.8
+distro==1.9.0
+docker==7.0.0
+docker-pycreds==0.4.0
+duckduckgo_search==5.3.0
+fastapi==0.100.1
+fastapi-socketio==0.0.10
+fastavro==1.9.4
+fastjsonschema==2.19.1
+filelock==3.13.4
+filetype==1.2.0
+fonttools==4.51.0
+fqdn==1.5.1
+frozenlist==1.4.1
+fsspec==2024.3.1
+gitdb==4.0.11
+GitPython==3.1.43
+googleapis-common-protos==1.63.0
+grandalf==0.8
+greenlet==3.0.3
+grpcio==1.62.2
+grpcio-tools==1.62.2
+h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
+httpcore==0.17.3
+httpx
+huggingface-hub==0.22.2
+hyperframe==6.0.1
+idna==3.6
+importlib-metadata==6.11.0
+install==1.3.5
+ipywidgets==8.1.2
+isoduration==20.11.0
+Jinja2==3.1.3
+joblib==1.4.0
+json5==0.9.25
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_server==2.14.0
+jupyter_server_terminals==0.5.3
+jupyterlab
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.26.0
+jupyterlab_widgets==3.0.10
+kiwisolver==1.4.5
+langchain==0.1.17
+langchain-community==0.0.36
+langchain-core==0.1.50
+langchain-openai==0.1.6
+langchain-text-splitters==0.0.1
+langchainhub==0.1.15
+langsmith==0.1.48
+Lazify==0.4.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.1
+matplotlib==3.8.4
+mdurl==0.1.2
+mistune==3.0.2
+multidict==6.0.5
+multiprocess==0.70.16
+mypy-extensions==1.0.0
+nbclient==0.10.0
+nbconvert==7.16.3
+nbformat==5.10.4
+networkx
+nltk==3.8.1
+notebook==7.1.2
+notebook_shim==0.2.4
+numpy==1.26.4
+openai==1.25.1
+opentelemetry-api==1.24.0
+opentelemetry-exporter-otlp==1.24.0
+opentelemetry-exporter-otlp-proto-common==1.24.0
+opentelemetry-exporter-otlp-proto-grpc==1.24.0
+opentelemetry-exporter-otlp-proto-http==1.24.0
+opentelemetry-instrumentation==0.45b0
+opentelemetry-proto==1.24.0
+opentelemetry-sdk==1.24.0
+opentelemetry-semantic-conventions==0.45b0
+orjson==3.10.1
+overrides==7.7.0
+packaging==23.2
+pandas==2.2.2
+pandocfilters==1.5.1
+pillow==10.3.0
+plotly==5.22.0
+portalocker==2.8.2
+prometheus_client==0.20.0
+protobuf==4.25.3
+pyarrow==16.0.0
+pyarrow-hotfix==0.6
+pycparser==2.22
+pydantic==2.6.4
+pydantic_core==2.16.3
+pydeck==0.9.0
+PyJWT==2.8.0
+PyMuPDF==1.24.2
+PyMuPDFb==1.24.1
+pyparsing==3.1.2
+pypdf==4.2.0
+pysbd==0.3.4
+python-dotenv==1.0.0
+python-engineio==4.9.0
+python-graphql-client==0.4.3
+python-json-logger==2.0.7
+python-magic==0.4.27
+python-multipart==0.0.6
+python-socketio==5.11.2
+pytz==2024.1
+PyYAML==6.0.1
+qdrant-client==1.9.1
+qtconsole==5.5.1
+QtPy==2.4.1
+ragas==0.1.7
+referencing==0.34.0
+regex==2024.4.16
+requests==2.31.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.1
+rpds-py==0.18.0
+scikit-learn==1.4.2
+scipy==1.13.0
+Send2Trash==1.8.3
+sentry-sdk==1.45.0
+setproctitle==1.3.3
+simple-websocket==1.0.0
+smmap==5.0.1
+sniffio==1.3.1
+soupsieve==2.5
+SQLAlchemy==2.0.29
+starlette==0.27.0
+streamlit==1.33.0
+striprtf==0.0.26
+syncer==2.0.3
+tenacity==8.2.3
+terminado==0.18.1
+threadpoolctl==3.4.0
+tiktoken==0.6.0
+tinycss2==1.2.1
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.1
+tqdm==4.66.2
+types-python-dateutil==2.9.0.20240316
+types-requests==2.31.0.20240406
+typing-inspect==0.9.0
+tzdata==2024.1
+uptrace==1.24.0
+uri-template==1.3.0
+urllib3==2.2.1
+uvicorn==0.23.2
+wandb==0.16.6
+watchfiles==0.20.0
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+websockets==12.0
+widgetsnbextension==4.0.10
+wikipedia==1.4.0
+wrapt==1.16.0
+wsproto==1.2.0
+xxhash==3.4.1
+yarl==1.9.4