Merge branch 'dev_branch' into text_extraction
Browse files- .github/workflows/push_to_hf_space_prototype.yml +14 -13
- .vscode/launch.json +35 -0
- .vscode/tasks.json +13 -0
- code/.chainlit/translations/en-US.json +0 -229
- code/main.py +18 -23
- code/modules/chat/chat_model_loader.py +13 -0
- code/modules/chat/helpers.py +5 -0
- code/modules/config/config.yml +4 -1
- code/modules/config/constants.py +1 -1
- code/modules/dataloader/data_loader.py +1 -1
.github/workflows/push_to_hf_space_prototype.yml
CHANGED
@@ -1,20 +1,21 @@
|
|
1 |
name: Push Prototype to HuggingFace
|
2 |
|
3 |
on:
|
4 |
-
|
5 |
-
branches:
|
6 |
-
|
7 |
-
|
|
|
8 |
|
9 |
jobs:
|
10 |
-
|
11 |
runs-on: ubuntu-latest
|
12 |
steps:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
1 |
name: Push Prototype to HuggingFace
|
2 |
|
3 |
on:
|
4 |
+
push:
|
5 |
+
branches: [dev_branch]
|
6 |
+
|
7 |
+
# run this workflow manuall from the Actions tab
|
8 |
+
workflow_dispatch:
|
9 |
|
10 |
jobs:
|
11 |
+
sync-to-hub:
|
12 |
runs-on: ubuntu-latest
|
13 |
steps:
|
14 |
+
- uses: actions/checkout@v4
|
15 |
+
with:
|
16 |
+
fetch-depth: 0
|
17 |
+
lfs: true
|
18 |
+
- name: Deploy Prototype to HuggingFace
|
19 |
+
env:
|
20 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
21 |
+
run: git push https://trgardos:$HF_TOKEN@huggingface.co/spaces/dl4ds/tutor_dev dev_branch:main
|
.vscode/launch.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
// Use IntelliSense to learn about possible attributes.
|
3 |
+
// Hover to view descriptions of existing attributes.
|
4 |
+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
5 |
+
"version": "0.2.0",
|
6 |
+
"configurations": [
|
7 |
+
{
|
8 |
+
"name": "Python Debugger: Chainlit run main.py",
|
9 |
+
"type": "debugpy",
|
10 |
+
"request": "launch",
|
11 |
+
"program": "${workspaceFolder}/.venv/bin/chainlit",
|
12 |
+
"console": "integratedTerminal",
|
13 |
+
"args": ["run", "main.py"],
|
14 |
+
"cwd": "${workspaceFolder}/code",
|
15 |
+
"justMyCode": true
|
16 |
+
},
|
17 |
+
{ "name":"Python Debugger: Module store_manager",
|
18 |
+
"type":"debugpy",
|
19 |
+
"request":"launch",
|
20 |
+
"module":"modules.vectorstore.store_manager",
|
21 |
+
"env": {"PYTHONPATH": "${workspaceFolder}/code"},
|
22 |
+
"cwd": "${workspaceFolder}/code",
|
23 |
+
"justMyCode": true
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "Python Debugger: Module data_loader",
|
27 |
+
"type": "debugpy",
|
28 |
+
"request": "launch",
|
29 |
+
"module": "modules.dataloader.data_loader",
|
30 |
+
"env": {"PYTHONPATH": "${workspaceFolder}/code"},
|
31 |
+
"cwd": "${workspaceFolder}/code",
|
32 |
+
"justMyCode": true
|
33 |
+
}
|
34 |
+
]
|
35 |
+
}
|
.vscode/tasks.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
// See https://go.microsoft.com/fwlink/?LinkId=733558
|
3 |
+
// for the documentation about the tasks.json format
|
4 |
+
"version": "2.0.0",
|
5 |
+
"tasks": [
|
6 |
+
{
|
7 |
+
"label": "echo",
|
8 |
+
"type": "shell",
|
9 |
+
"command": "echo ${workspaceFolder}; ls ${workspaceFolder}/code",
|
10 |
+
"problemMatcher": []
|
11 |
+
}
|
12 |
+
]
|
13 |
+
}
|
code/.chainlit/translations/en-US.json
DELETED
@@ -1,229 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"components": {
|
3 |
-
"atoms": {
|
4 |
-
"buttons": {
|
5 |
-
"userButton": {
|
6 |
-
"menu": {
|
7 |
-
"settings": "Settings",
|
8 |
-
"settingsKey": "S",
|
9 |
-
"APIKeys": "API Keys",
|
10 |
-
"logout": "Logout"
|
11 |
-
}
|
12 |
-
}
|
13 |
-
}
|
14 |
-
},
|
15 |
-
"molecules": {
|
16 |
-
"newChatButton": {
|
17 |
-
"newChat": "New Chat"
|
18 |
-
},
|
19 |
-
"tasklist": {
|
20 |
-
"TaskList": {
|
21 |
-
"title": "\ud83d\uddd2\ufe0f Task List",
|
22 |
-
"loading": "Loading...",
|
23 |
-
"error": "An error occurred"
|
24 |
-
}
|
25 |
-
},
|
26 |
-
"attachments": {
|
27 |
-
"cancelUpload": "Cancel upload",
|
28 |
-
"removeAttachment": "Remove attachment"
|
29 |
-
},
|
30 |
-
"newChatDialog": {
|
31 |
-
"createNewChat": "Create new chat?",
|
32 |
-
"clearChat": "This will clear the current messages and start a new chat.",
|
33 |
-
"cancel": "Cancel",
|
34 |
-
"confirm": "Confirm"
|
35 |
-
},
|
36 |
-
"settingsModal": {
|
37 |
-
"settings": "Settings",
|
38 |
-
"expandMessages": "Expand Messages",
|
39 |
-
"hideChainOfThought": "Hide Chain of Thought",
|
40 |
-
"darkMode": "Dark Mode"
|
41 |
-
},
|
42 |
-
"detailsButton": {
|
43 |
-
"using": "Using",
|
44 |
-
"used": "Used"
|
45 |
-
},
|
46 |
-
"auth": {
|
47 |
-
"authLogin": {
|
48 |
-
"title": "Login to access the app.",
|
49 |
-
"form": {
|
50 |
-
"email": "Email address",
|
51 |
-
"password": "Password",
|
52 |
-
"noAccount": "Don't have an account?",
|
53 |
-
"alreadyHaveAccount": "Already have an account?",
|
54 |
-
"signup": "Sign Up",
|
55 |
-
"signin": "Sign In",
|
56 |
-
"or": "OR",
|
57 |
-
"continue": "Continue",
|
58 |
-
"forgotPassword": "Forgot password?",
|
59 |
-
"passwordMustContain": "Your password must contain:",
|
60 |
-
"emailRequired": "email is a required field",
|
61 |
-
"passwordRequired": "password is a required field"
|
62 |
-
},
|
63 |
-
"error": {
|
64 |
-
"default": "Unable to sign in.",
|
65 |
-
"signin": "Try signing in with a different account.",
|
66 |
-
"oauthsignin": "Try signing in with a different account.",
|
67 |
-
"redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.",
|
68 |
-
"oauthcallbackerror": "Try signing in with a different account.",
|
69 |
-
"oauthcreateaccount": "Try signing in with a different account.",
|
70 |
-
"emailcreateaccount": "Try signing in with a different account.",
|
71 |
-
"callback": "Try signing in with a different account.",
|
72 |
-
"oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.",
|
73 |
-
"emailsignin": "The e-mail could not be sent.",
|
74 |
-
"emailverify": "Please verify your email, a new email has been sent.",
|
75 |
-
"credentialssignin": "Sign in failed. Check the details you provided are correct.",
|
76 |
-
"sessionrequired": "Please sign in to access this page."
|
77 |
-
}
|
78 |
-
},
|
79 |
-
"authVerifyEmail": {
|
80 |
-
"almostThere": "You're almost there! We've sent an email to ",
|
81 |
-
"verifyEmailLink": "Please click on the link in that email to complete your signup.",
|
82 |
-
"didNotReceive": "Can't find the email?",
|
83 |
-
"resendEmail": "Resend email",
|
84 |
-
"goBack": "Go Back",
|
85 |
-
"emailSent": "Email sent successfully.",
|
86 |
-
"verifyEmail": "Verify your email address"
|
87 |
-
},
|
88 |
-
"providerButton": {
|
89 |
-
"continue": "Continue with {{provider}}",
|
90 |
-
"signup": "Sign up with {{provider}}"
|
91 |
-
},
|
92 |
-
"authResetPassword": {
|
93 |
-
"newPasswordRequired": "New password is a required field",
|
94 |
-
"passwordsMustMatch": "Passwords must match",
|
95 |
-
"confirmPasswordRequired": "Confirm password is a required field",
|
96 |
-
"newPassword": "New password",
|
97 |
-
"confirmPassword": "Confirm password",
|
98 |
-
"resetPassword": "Reset Password"
|
99 |
-
},
|
100 |
-
"authForgotPassword": {
|
101 |
-
"email": "Email address",
|
102 |
-
"emailRequired": "email is a required field",
|
103 |
-
"emailSent": "Please check the email address {{email}} for instructions to reset your password.",
|
104 |
-
"enterEmail": "Enter your email address and we will send you instructions to reset your password.",
|
105 |
-
"resendEmail": "Resend email",
|
106 |
-
"continue": "Continue",
|
107 |
-
"goBack": "Go Back"
|
108 |
-
}
|
109 |
-
}
|
110 |
-
},
|
111 |
-
"organisms": {
|
112 |
-
"chat": {
|
113 |
-
"history": {
|
114 |
-
"index": {
|
115 |
-
"showHistory": "Show history",
|
116 |
-
"lastInputs": "Last Inputs",
|
117 |
-
"noInputs": "Such empty...",
|
118 |
-
"loading": "Loading..."
|
119 |
-
}
|
120 |
-
},
|
121 |
-
"inputBox": {
|
122 |
-
"input": {
|
123 |
-
"placeholder": "Type your message here..."
|
124 |
-
},
|
125 |
-
"speechButton": {
|
126 |
-
"start": "Start recording",
|
127 |
-
"stop": "Stop recording"
|
128 |
-
},
|
129 |
-
"SubmitButton": {
|
130 |
-
"sendMessage": "Send message",
|
131 |
-
"stopTask": "Stop Task"
|
132 |
-
},
|
133 |
-
"UploadButton": {
|
134 |
-
"attachFiles": "Attach files"
|
135 |
-
},
|
136 |
-
"waterMark": {
|
137 |
-
"text": "Built with"
|
138 |
-
}
|
139 |
-
},
|
140 |
-
"Messages": {
|
141 |
-
"index": {
|
142 |
-
"running": "Running",
|
143 |
-
"executedSuccessfully": "executed successfully",
|
144 |
-
"failed": "failed",
|
145 |
-
"feedbackUpdated": "Feedback updated",
|
146 |
-
"updating": "Updating"
|
147 |
-
}
|
148 |
-
},
|
149 |
-
"dropScreen": {
|
150 |
-
"dropYourFilesHere": "Drop your files here"
|
151 |
-
},
|
152 |
-
"index": {
|
153 |
-
"failedToUpload": "Failed to upload",
|
154 |
-
"cancelledUploadOf": "Cancelled upload of",
|
155 |
-
"couldNotReachServer": "Could not reach the server",
|
156 |
-
"continuingChat": "Continuing previous chat"
|
157 |
-
},
|
158 |
-
"settings": {
|
159 |
-
"settingsPanel": "Settings panel",
|
160 |
-
"reset": "Reset",
|
161 |
-
"cancel": "Cancel",
|
162 |
-
"confirm": "Confirm"
|
163 |
-
}
|
164 |
-
},
|
165 |
-
"threadHistory": {
|
166 |
-
"sidebar": {
|
167 |
-
"filters": {
|
168 |
-
"FeedbackSelect": {
|
169 |
-
"feedbackAll": "Feedback: All",
|
170 |
-
"feedbackPositive": "Feedback: Positive",
|
171 |
-
"feedbackNegative": "Feedback: Negative"
|
172 |
-
},
|
173 |
-
"SearchBar": {
|
174 |
-
"search": "Search"
|
175 |
-
}
|
176 |
-
},
|
177 |
-
"DeleteThreadButton": {
|
178 |
-
"confirmMessage": "This will delete the thread as well as it's messages and elements.",
|
179 |
-
"cancel": "Cancel",
|
180 |
-
"confirm": "Confirm",
|
181 |
-
"deletingChat": "Deleting chat",
|
182 |
-
"chatDeleted": "Chat deleted"
|
183 |
-
},
|
184 |
-
"index": {
|
185 |
-
"pastChats": "Past Chats"
|
186 |
-
},
|
187 |
-
"ThreadList": {
|
188 |
-
"empty": "Empty...",
|
189 |
-
"today": "Today",
|
190 |
-
"yesterday": "Yesterday",
|
191 |
-
"previous7days": "Previous 7 days",
|
192 |
-
"previous30days": "Previous 30 days"
|
193 |
-
},
|
194 |
-
"TriggerButton": {
|
195 |
-
"closeSidebar": "Close sidebar",
|
196 |
-
"openSidebar": "Open sidebar"
|
197 |
-
}
|
198 |
-
},
|
199 |
-
"Thread": {
|
200 |
-
"backToChat": "Go back to chat",
|
201 |
-
"chatCreatedOn": "This chat was created on"
|
202 |
-
}
|
203 |
-
},
|
204 |
-
"header": {
|
205 |
-
"chat": "Chat",
|
206 |
-
"readme": "Readme"
|
207 |
-
}
|
208 |
-
}
|
209 |
-
},
|
210 |
-
"hooks": {
|
211 |
-
"useLLMProviders": {
|
212 |
-
"failedToFetchProviders": "Failed to fetch providers:"
|
213 |
-
}
|
214 |
-
},
|
215 |
-
"pages": {
|
216 |
-
"Design": {},
|
217 |
-
"Env": {
|
218 |
-
"savedSuccessfully": "Saved successfully",
|
219 |
-
"requiredApiKeys": "Required API Keys",
|
220 |
-
"requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
|
221 |
-
},
|
222 |
-
"Page": {
|
223 |
-
"notPartOfProject": "You are not part of this project."
|
224 |
-
},
|
225 |
-
"ResumeButton": {
|
226 |
-
"resumeChat": "Resume Chat"
|
227 |
-
}
|
228 |
-
}
|
229 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code/main.py
CHANGED
@@ -17,6 +17,7 @@ from modules.chat.helpers import (
|
|
17 |
get_sources,
|
18 |
get_history_chat_resume,
|
19 |
get_history_setup_llm,
|
|
|
20 |
)
|
21 |
import copy
|
22 |
from typing import Optional
|
@@ -55,7 +56,7 @@ class Chatbot:
|
|
55 |
"""
|
56 |
self.config = config
|
57 |
|
58 |
-
def _load_config(self):
|
59 |
"""
|
60 |
Load the configuration from a YAML file.
|
61 |
"""
|
@@ -277,7 +278,7 @@ class Chatbot:
|
|
277 |
rename_dict = {"Chatbot": "AI Tutor"}
|
278 |
return rename_dict.get(orig_author, orig_author)
|
279 |
|
280 |
-
async def start(self):
|
281 |
"""
|
282 |
Start the chatbot, initialize settings widgets,
|
283 |
and display and load previous conversation if chat logging is enabled.
|
@@ -285,6 +286,12 @@ class Chatbot:
|
|
285 |
|
286 |
start_time = time.time()
|
287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
await self.make_llm_settings_widgets(self.config)
|
289 |
user = cl.user_session.get("user")
|
290 |
self.user = {
|
@@ -370,25 +377,6 @@ class Chatbot:
|
|
370 |
|
371 |
answer = res.get("answer", res.get("result"))
|
372 |
|
373 |
-
if cl_data._data_layer is not None:
|
374 |
-
with cl_data._data_layer.client.step(
|
375 |
-
type="run",
|
376 |
-
name="step_info",
|
377 |
-
thread_id=cl.context.session.thread_id,
|
378 |
-
# tags=self.tags,
|
379 |
-
) as step:
|
380 |
-
|
381 |
-
step.input = {"question": user_query_dict["input"]}
|
382 |
-
|
383 |
-
step.output = {
|
384 |
-
"chat_history": res.get("chat_history"),
|
385 |
-
"context": res.get("context"),
|
386 |
-
"answer": answer,
|
387 |
-
"rephrase_prompt": res.get("rephrase_prompt"),
|
388 |
-
"qa_prompt": res.get("qa_prompt"),
|
389 |
-
}
|
390 |
-
step.metadata = self.config
|
391 |
-
|
392 |
answer_with_sources, source_elements, sources_dict = get_sources(
|
393 |
res, answer, stream=stream, view_sources=view_sources
|
394 |
)
|
@@ -425,14 +413,21 @@ class Chatbot:
|
|
425 |
elements=source_elements,
|
426 |
author=LLM,
|
427 |
actions=actions,
|
|
|
428 |
).send()
|
429 |
|
430 |
async def on_chat_resume(self, thread: ThreadDict):
|
|
|
431 |
steps = thread["steps"]
|
432 |
-
k = self.config["llm_params"][
|
|
|
|
|
433 |
conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
|
|
|
|
|
|
|
434 |
cl.user_session.set("memory", conversation_list)
|
435 |
-
await self.start()
|
436 |
|
437 |
@cl.oauth_callback
|
438 |
def auth_callback(
|
|
|
17 |
get_sources,
|
18 |
get_history_chat_resume,
|
19 |
get_history_setup_llm,
|
20 |
+
get_last_config,
|
21 |
)
|
22 |
import copy
|
23 |
from typing import Optional
|
|
|
56 |
"""
|
57 |
self.config = config
|
58 |
|
59 |
+
async def _load_config(self):
|
60 |
"""
|
61 |
Load the configuration from a YAML file.
|
62 |
"""
|
|
|
278 |
rename_dict = {"Chatbot": "AI Tutor"}
|
279 |
return rename_dict.get(orig_author, orig_author)
|
280 |
|
281 |
+
async def start(self, config=None):
|
282 |
"""
|
283 |
Start the chatbot, initialize settings widgets,
|
284 |
and display and load previous conversation if chat logging is enabled.
|
|
|
286 |
|
287 |
start_time = time.time()
|
288 |
|
289 |
+
self.config = (
|
290 |
+
await self._load_config() if config is None else config
|
291 |
+
) # Reload the configuration on chat resume
|
292 |
+
|
293 |
+
await self.make_llm_settings_widgets(self.config) # Reload the settings widgets
|
294 |
+
|
295 |
await self.make_llm_settings_widgets(self.config)
|
296 |
user = cl.user_session.get("user")
|
297 |
self.user = {
|
|
|
377 |
|
378 |
answer = res.get("answer", res.get("result"))
|
379 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
answer_with_sources, source_elements, sources_dict = get_sources(
|
381 |
res, answer, stream=stream, view_sources=view_sources
|
382 |
)
|
|
|
413 |
elements=source_elements,
|
414 |
author=LLM,
|
415 |
actions=actions,
|
416 |
+
metadata=self.config,
|
417 |
).send()
|
418 |
|
419 |
async def on_chat_resume(self, thread: ThreadDict):
|
420 |
+
thread_config = None
|
421 |
steps = thread["steps"]
|
422 |
+
k = self.config["llm_params"][
|
423 |
+
"memory_window"
|
424 |
+
] # on resume, alwyas use the default memory window
|
425 |
conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
|
426 |
+
thread_config = get_last_config(
|
427 |
+
steps
|
428 |
+
) # TODO: Returns None for now - which causes config to be reloaded with default values
|
429 |
cl.user_session.set("memory", conversation_list)
|
430 |
+
await self.start(config=thread_config)
|
431 |
|
432 |
@cl.oauth_callback
|
433 |
def auth_callback(
|
code/modules/chat/chat_model_loader.py
CHANGED
@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
|
|
5 |
import torch
|
6 |
import transformers
|
7 |
import os
|
|
|
|
|
8 |
from langchain.callbacks.manager import CallbackManager
|
9 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
10 |
from modules.config.constants import LLAMA_PATH
|
@@ -15,6 +17,14 @@ class ChatModelLoader:
|
|
15 |
self.config = config
|
16 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def load_chat_model(self):
|
19 |
if self.config["llm_params"]["llm_loader"] in [
|
20 |
"gpt-3.5-turbo-1106",
|
@@ -24,6 +34,9 @@ class ChatModelLoader:
|
|
24 |
llm = ChatOpenAI(model_name=self.config["llm_params"]["llm_loader"])
|
25 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
26 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
|
|
|
|
|
|
27 |
llm = LlamaCpp(
|
28 |
model_path=LLAMA_PATH,
|
29 |
n_batch=n_batch,
|
|
|
5 |
import torch
|
6 |
import transformers
|
7 |
import os
|
8 |
+
from pathlib import Path
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
from langchain.callbacks.manager import CallbackManager
|
11 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
12 |
from modules.config.constants import LLAMA_PATH
|
|
|
17 |
self.config = config
|
18 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
19 |
|
20 |
+
def _verify_model_cache(self, model_cache_path):
|
21 |
+
hf_hub_download(
|
22 |
+
repo_id=self.config["llm_params"]["local_llm_params"]["repo_id"],
|
23 |
+
filename=self.config["llm_params"]["local_llm_params"]["filename"],
|
24 |
+
cache_dir=model_cache_path,
|
25 |
+
)
|
26 |
+
return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
|
27 |
+
|
28 |
def load_chat_model(self):
|
29 |
if self.config["llm_params"]["llm_loader"] in [
|
30 |
"gpt-3.5-turbo-1106",
|
|
|
34 |
llm = ChatOpenAI(model_name=self.config["llm_params"]["llm_loader"])
|
35 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
36 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
37 |
+
model_path = self._verify_model_cache(
|
38 |
+
self.config["llm_params"]["local_llm_params"]["model"]
|
39 |
+
)
|
40 |
llm = LlamaCpp(
|
41 |
model_path=LLAMA_PATH,
|
42 |
n_batch=n_batch,
|
code/modules/chat/helpers.py
CHANGED
@@ -162,3 +162,8 @@ def get_history_setup_llm(memory_list):
|
|
162 |
raise ValueError("Invalid message type")
|
163 |
|
164 |
return conversation_list
|
|
|
|
|
|
|
|
|
|
|
|
162 |
raise ValueError("Invalid message type")
|
163 |
|
164 |
return conversation_list
|
165 |
+
|
166 |
+
|
167 |
+
def get_last_config(steps):
|
168 |
+
# TODO: Implement this function
|
169 |
+
return None
|
code/modules/config/config.yml
CHANGED
@@ -35,6 +35,9 @@ llm_params:
|
|
35 |
temperature: 0.7 # float
|
36 |
local_llm_params:
|
37 |
temperature: 0.7 # float
|
|
|
|
|
|
|
38 |
stream: False # bool
|
39 |
pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
|
40 |
|
@@ -54,4 +57,4 @@ splitter_options:
|
|
54 |
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
|
55 |
front_chunks_to_remove : null # int or None
|
56 |
last_chunks_to_remove : null # int or None
|
57 |
-
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|
|
|
35 |
temperature: 0.7 # float
|
36 |
local_llm_params:
|
37 |
temperature: 0.7 # float
|
38 |
+
repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
|
39 |
+
filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
|
40 |
+
pdf_reader: 'pymupdf' # str [llama, pymupdf, gpt]
|
41 |
stream: False # bool
|
42 |
pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
|
43 |
|
|
|
57 |
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
|
58 |
front_chunks_to_remove : null # int or None
|
59 |
last_chunks_to_remove : null # int or None
|
60 |
+
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|
code/modules/config/constants.py
CHANGED
@@ -18,6 +18,6 @@ opening_message = f"Hey, What Can I Help You With?\n\nYou can me ask me question
|
|
18 |
|
19 |
# Model Paths
|
20 |
|
21 |
-
LLAMA_PATH = "../storage/models/tinyllama
|
22 |
|
23 |
RETRIEVER_HF_PATHS = {"RAGatouille": "XThomasBU/Colbert_Index"}
|
|
|
18 |
|
19 |
# Model Paths
|
20 |
|
21 |
+
LLAMA_PATH = "../storage/models/tinyllama"
|
22 |
|
23 |
RETRIEVER_HF_PATHS = {"RAGatouille": "XThomasBU/Colbert_Index"}
|
code/modules/dataloader/data_loader.py
CHANGED
@@ -98,7 +98,6 @@ class FileReader:
|
|
98 |
self.web_reader = HTMLReader()
|
99 |
self.logger.info(f"Initialized FileReader with {kind} PDF reader and HTML reader")
|
100 |
|
101 |
-
|
102 |
def extract_text_from_pdf(self, pdf_path):
|
103 |
text = ""
|
104 |
with open(pdf_path, "rb") as file:
|
@@ -315,6 +314,7 @@ class ChunkProcessor:
|
|
315 |
return
|
316 |
|
317 |
try:
|
|
|
318 |
if file_path in self.document_data:
|
319 |
self.logger.warning(f"File {file_name} already processed")
|
320 |
documents = [Document(page_content=content) for content in self.document_data[file_path].values()]
|
|
|
98 |
self.web_reader = HTMLReader()
|
99 |
self.logger.info(f"Initialized FileReader with {kind} PDF reader and HTML reader")
|
100 |
|
|
|
101 |
def extract_text_from_pdf(self, pdf_path):
|
102 |
text = ""
|
103 |
with open(pdf_path, "rb") as file:
|
|
|
314 |
return
|
315 |
|
316 |
try:
|
317 |
+
|
318 |
if file_path in self.document_data:
|
319 |
self.logger.warning(f"File {file_name} already processed")
|
320 |
documents = [Document(page_content=content) for content in self.document_data[file_path].values()]
|