MikeCraBash commited on
Commit
fc77a86
1 Parent(s): 1390311
Files changed (6) hide show
  1. .chainlit/config.toml +78 -0
  2. Dockerfile +12 -0
  3. __pycache__/app.cpython-311.pyc +0 -0
  4. app.py +111 -0
  5. chainlit.md +3 -0
  6. requirements.txt +206 -0
.chainlit/config.toml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = true
20
+
21
+ # Authorize users to upload files with messages
22
+ multi_modal = true
23
+
24
+ # Allows user to use speech to text
25
+ [features.speech_to_text]
26
+ enabled = false
27
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
28
+ # language = "en-US"
29
+
30
+ [UI]
31
+ # Name of the app and chatbot.
32
+ name = "Chatbot"
33
+
34
+ # Show the readme while the conversation is empty.
35
+ show_readme_as_default = true
36
+
37
+ # Description of the app and chatbot. This is used for HTML tags.
38
+ # description = ""
39
+
40
+ # Large size content are by default collapsed for a cleaner ui
41
+ default_collapse_content = true
42
+
43
+ # The default value for the expand messages settings.
44
+ default_expand_messages = false
45
+
46
+ # Hide the chain of thought details from the user in the UI.
47
+ hide_cot = false
48
+
49
+ # Link to your github repo. This will add a github button in the UI's header.
50
+ # github = ""
51
+
52
+ # Specify a CSS file that can be used to customize the user interface.
53
+ # The CSS file can be served from the public directory or via an external link.
54
+ # custom_css = "/public/test.css"
55
+
56
+ # Override default MUI light theme. (Check theme.ts)
57
+ [UI.theme.light]
58
+ #background = "#FAFAFA"
59
+ #paper = "#FFFFFF"
60
+
61
+ [UI.theme.light.primary]
62
+ #main = "#F80061"
63
+ #dark = "#980039"
64
+ #light = "#FFE7EB"
65
+
66
+ # Override default MUI dark theme. (Check theme.ts)
67
+ [UI.theme.dark]
68
+ #background = "#FAFAFA"
69
+ #paper = "#FFFFFF"
70
+
71
+ [UI.theme.dark.primary]
72
+ #main = "#F80061"
73
+ #dark = "#980039"
74
+ #light = "#FFE7EB"
75
+
76
+
77
+ [meta]
78
+ generated_by = "0.7.501"
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
12
+
__pycache__/app.cpython-311.pyc ADDED
Binary file (4.66 kB). View file
 
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI MAKERSPACE PREPR
2
+ # Date: 2024-5-16
3
+
4
+ # Basic Imports & Setup
5
+ import os
6
+ from openai import AsyncOpenAI
7
+
8
+ # Using Chainlit for our UI
9
+ import chainlit as cl
10
+ from chainlit.prompt import Prompt, PromptMessage
11
+ from chainlit.playground.providers import ChatOpenAI
12
+
13
+ # Getting the API key from the .env file
14
+ from dotenv import load_dotenv
15
+ load_dotenv()
16
+
17
+ # RAG pipeline imports and setup code
18
+ # Get the DeveloperWeek PDF file (future implementation: direct download from URL)
19
+ from langchain.document_loaders import PyMuPDFLoader
20
+
21
+ # Adjust the URL to the direct download format
22
+ file_id = "1JeA-w4kvbI3GHk9Dh_j19_Q0JUDE7hse"
23
+ direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"
24
+
25
+ # Now load the document using the direct URL
26
+ docs = PyMuPDFLoader(direct_url).load()
27
+
28
+ import tiktoken
29
+ def tiktoken_len(text):
30
+ tokens = tiktoken.encoding_for_model("gpt-3.5-turbo").encode(
31
+ text,
32
+ )
33
+ return len(tokens)
34
+
35
+ # Split the document into chunks
36
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
37
+
38
+ text_splitter = RecursiveCharacterTextSplitter(
39
+ chunk_size = 500, # 500 tokens per chunk, experiment with this value
40
+ chunk_overlap = 50, # 50 tokens overlap between chunks, experiment with this value
41
+ length_function = tiktoken_len,
42
+ )
43
+
44
+ split_chunks = text_splitter.split_documents(docs)
45
+
46
+ # Load the embeddings model
47
+ from langchain_openai.embeddings import OpenAIEmbeddings
48
+
49
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
50
+
51
+ # Load the vector store and retriever from Qdrant
52
+ from langchain_community.vectorstores import Qdrant
53
+
54
+ qdrant_vectorstore = Qdrant.from_documents(
55
+ split_chunks,
56
+ embedding_model,
57
+ location=":memory:",
58
+ collection_name="Prepr",
59
+ )
60
+
61
+ qdrant_retriever = qdrant_vectorstore.as_retriever()
62
+
63
+ from langchain_openai import ChatOpenAI
64
+ openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
65
+
66
+ from langchain_core.prompts import ChatPromptTemplate
67
+ RAG_PROMPT = """
68
+ CONTEXT:
69
+ {context}
70
+
71
+ QUERY:
72
+ {question}
73
+
74
+ Use the provided context to answer the user's query. You are a professional personal assistant for an executive professional in a high tech company. You help them plan for events and meetings.
75
+ You always review the provided event information. You can look up dates and location where event sessions take place from the document. If you do not know the answer, or cannot answer, please respond with "Insufficient data for further analysis, please try again". You end your successful responses with "Is there anything else that I can help you with?". If the user says NO, or any other negative response, then you ask "How did I do?" >>
76
+ """
77
+
78
+ rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
79
+
80
+ from operator import itemgetter
81
+ from langchain.schema.output_parser import StrOutputParser
82
+ from langchain.schema.runnable import RunnablePassthrough
83
+
84
+ retrieval_augmented_qa_chain = (
85
+ {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
86
+ | RunnablePassthrough.assign(context=itemgetter("context"))
87
+ | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
88
+ )
89
+
90
+ # Chainlit App
91
+ @cl.on_chat_start
92
+ async def start_chat():
93
+ settings = {
94
+ "model": "gpt-3.5-turbo",
95
+ "temperature": 0,
96
+ "max_tokens": 500,
97
+ "top_p": 1,
98
+ "frequency_penalty": 0,
99
+ "presence_penalty": 0,
100
+ }
101
+ cl.user_session.set("settings", settings)
102
+
103
+ @cl.on_message
104
+ async def main(message: cl.Message):
105
+ chainlit_question = message.content
106
+ #chainlit_question = "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
107
+ response = retrieval_augmented_qa_chain.invoke({"question": chainlit_question})
108
+ chainlit_answer = response["response"].content
109
+
110
+ msg = cl.Message(content=chainlit_answer)
111
+ await msg.send()
chainlit.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # AI Makerspace Demo Day - Prepr
2
+
3
+ Welcome to Prepr your personal preparation assistant. I can help you prepare for a conference, a meeting or an interview ... How can I help you today?
requirements.txt ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.9.5
3
+ aiosignal==1.3.1
4
+ altair==5.3.0
5
+ annotated-types==0.6.0
6
+ anyio==3.7.1
7
+ appdirs==1.4.4
8
+ argon2-cffi==23.1.0
9
+ argon2-cffi-bindings==21.2.0
10
+ arrow==1.3.0
11
+ async-lru==2.0.4
12
+ asyncer==0.0.2
13
+ attrs==23.2.0
14
+ Babel==2.14.0
15
+ backoff==2.2.1
16
+ beautifulsoup4==4.12.3
17
+ bidict==0.23.1
18
+ bleach==6.1.0
19
+ blinker==1.8.1
20
+ cachetools==5.3.3
21
+ certifi==2024.2.2
22
+ cffi==1.16.0
23
+ chainlit
24
+ charset-normalizer==3.3.2
25
+ click==8.1.7
26
+ cohere==4.37
27
+ contourpy==1.2.1
28
+ curl_cffi==0.6.2
29
+ cycler==0.12.1
30
+ dataclasses-json==0.5.14
31
+ datasets==2.19.0
32
+ defusedxml==0.7.1
33
+ Deprecated==1.2.14
34
+ dill==0.3.8
35
+ dirtyjson==1.0.8
36
+ distro==1.9.0
37
+ docker==7.0.0
38
+ docker-pycreds==0.4.0
39
+ duckduckgo_search==5.3.0
40
+ fastapi==0.100.1
41
+ fastapi-socketio==0.0.10
42
+ fastavro==1.9.4
43
+ fastjsonschema==2.19.1
44
+ filelock==3.13.4
45
+ filetype==1.2.0
46
+ fonttools==4.51.0
47
+ fqdn==1.5.1
48
+ frozenlist==1.4.1
49
+ fsspec==2024.3.1
50
+ gitdb==4.0.11
51
+ GitPython==3.1.43
52
+ googleapis-common-protos==1.63.0
53
+ grandalf==0.8
54
+ greenlet==3.0.3
55
+ grpcio==1.62.2
56
+ grpcio-tools==1.62.2
57
+ h11==0.14.0
58
+ h2==4.1.0
59
+ hpack==4.0.0
60
+ httpcore==0.17.3
61
+ httpx
62
+ huggingface-hub==0.22.2
63
+ hyperframe==6.0.1
64
+ idna==3.6
65
+ importlib-metadata==6.11.0
66
+ install==1.3.5
67
+ ipywidgets==8.1.2
68
+ isoduration==20.11.0
69
+ Jinja2==3.1.3
70
+ joblib==1.4.0
71
+ json5==0.9.25
72
+ jsonpatch==1.33
73
+ jsonpointer==2.4
74
+ jsonschema==4.21.1
75
+ jsonschema-specifications==2023.12.1
76
+ jupyter==1.0.0
77
+ jupyter-console==6.6.3
78
+ jupyter-events==0.10.0
79
+ jupyter-lsp==2.2.5
80
+ jupyter_server==2.14.0
81
+ jupyter_server_terminals==0.5.3
82
+ jupyterlab
83
+ jupyterlab_pygments==0.3.0
84
+ jupyterlab_server==2.26.0
85
+ jupyterlab_widgets==3.0.10
86
+ kiwisolver==1.4.5
87
+ langchain==0.1.17
88
+ langchain-community==0.0.36
89
+ langchain-core==0.1.50
90
+ langchain-openai==0.1.6
91
+ langchain-text-splitters==0.0.1
92
+ langchainhub==0.1.15
93
+ langsmith==0.1.48
94
+ Lazify==0.4.0
95
+ markdown-it-py==3.0.0
96
+ MarkupSafe==2.1.5
97
+ marshmallow==3.21.1
98
+ matplotlib==3.8.4
99
+ mdurl==0.1.2
100
+ mistune==3.0.2
101
+ multidict==6.0.5
102
+ multiprocess==0.70.16
103
+ mypy-extensions==1.0.0
104
+ nbclient==0.10.0
105
+ nbconvert==7.16.3
106
+ nbformat==5.10.4
107
+ networkx
108
+ nltk==3.8.1
109
+ notebook==7.1.2
110
+ notebook_shim==0.2.4
111
+ numpy==1.26.4
112
+ openai==1.25.1
113
+ opentelemetry-api==1.24.0
114
+ opentelemetry-exporter-otlp==1.24.0
115
+ opentelemetry-exporter-otlp-proto-common==1.24.0
116
+ opentelemetry-exporter-otlp-proto-grpc==1.24.0
117
+ opentelemetry-exporter-otlp-proto-http==1.24.0
118
+ opentelemetry-instrumentation==0.45b0
119
+ opentelemetry-proto==1.24.0
120
+ opentelemetry-sdk==1.24.0
121
+ opentelemetry-semantic-conventions==0.45b0
122
+ orjson==3.10.1
123
+ overrides==7.7.0
124
+ packaging==23.2
125
+ pandas==2.2.2
126
+ pandocfilters==1.5.1
127
+ pillow==10.3.0
128
+ plotly==5.22.0
129
+ portalocker==2.8.2
130
+ prometheus_client==0.20.0
131
+ protobuf==4.25.3
132
+ pyarrow==16.0.0
133
+ pyarrow-hotfix==0.6
134
+ pycparser==2.22
135
+ pydantic==2.6.4
136
+ pydantic_core==2.16.3
137
+ pydeck==0.9.0
138
+ PyJWT==2.8.0
139
+ PyMuPDF==1.24.2
140
+ PyMuPDFb==1.24.1
141
+ pyparsing==3.1.2
142
+ pypdf==4.2.0
143
+ pysbd==0.3.4
144
+ python-dotenv==1.0.0
145
+ python-engineio==4.9.0
146
+ python-graphql-client==0.4.3
147
+ python-json-logger==2.0.7
148
+ python-magic==0.4.27
149
+ python-multipart==0.0.6
150
+ python-socketio==5.11.2
151
+ pytz==2024.1
152
+ PyYAML==6.0.1
153
+ qdrant-client==1.9.1
154
+ qtconsole==5.5.1
155
+ QtPy==2.4.1
156
+ ragas==0.1.7
157
+ referencing==0.34.0
158
+ regex==2024.4.16
159
+ requests==2.31.0
160
+ rfc3339-validator==0.1.4
161
+ rfc3986-validator==0.1.1
162
+ rich==13.7.1
163
+ rpds-py==0.18.0
164
+ scikit-learn==1.4.2
165
+ scipy==1.13.0
166
+ Send2Trash==1.8.3
167
+ sentry-sdk==1.45.0
168
+ setproctitle==1.3.3
169
+ simple-websocket==1.0.0
170
+ smmap==5.0.1
171
+ sniffio==1.3.1
172
+ soupsieve==2.5
173
+ SQLAlchemy==2.0.29
174
+ starlette==0.27.0
175
+ streamlit==1.33.0
176
+ striprtf==0.0.26
177
+ syncer==2.0.3
178
+ tenacity==8.2.3
179
+ terminado==0.18.1
180
+ threadpoolctl==3.4.0
181
+ tiktoken==0.6.0
182
+ tinycss2==1.2.1
183
+ toml==0.10.2
184
+ tomli==2.0.1
185
+ toolz==0.12.1
186
+ tqdm==4.66.2
187
+ types-python-dateutil==2.9.0.20240316
188
+ types-requests==2.31.0.20240406
189
+ typing-inspect==0.9.0
190
+ tzdata==2024.1
191
+ uptrace==1.24.0
192
+ uri-template==1.3.0
193
+ urllib3==2.2.1
194
+ uvicorn==0.23.2
195
+ wandb==0.16.6
196
+ watchfiles==0.20.0
197
+ webcolors==1.13
198
+ webencodings==0.5.1
199
+ websocket-client==1.7.0
200
+ websockets==12.0
201
+ widgetsnbextension==4.0.10
202
+ wikipedia==1.4.0
203
+ wrapt==1.16.0
204
+ wsproto==1.2.0
205
+ xxhash==3.4.1
206
+ yarl==1.9.4