Spaces:
Running
Running
Farid Karimli
commited on
Commit
·
527151b
1
Parent(s):
c028257
Retarget to Spring 25
Browse files- .github/workflows/code_quality_check.yml +23 -23
- .github/workflows/deploy_to_hf.yml +21 -0
- .gitignore +2 -0
- apps/ai_tutor/chainlit_app.py +8 -8
- apps/ai_tutor/config/config.yml +30 -30
- apps/ai_tutor/config/project_config.yml +43 -3
- apps/ai_tutor/public/files/students_encrypted.json +21 -1
- apps/ai_tutor/storage/data/urls.txt +1 -1
.github/workflows/code_quality_check.yml
CHANGED
@@ -2,32 +2,32 @@ name: Code Quality and Security Checks
|
|
2 |
|
3 |
on:
|
4 |
push:
|
5 |
-
branches: [
|
6 |
pull_request:
|
7 |
-
branches: [
|
8 |
|
9 |
jobs:
|
10 |
code-quality:
|
11 |
runs-on: ubuntu-latest
|
12 |
steps:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
2 |
|
3 |
on:
|
4 |
push:
|
5 |
+
branches: [main]
|
6 |
pull_request:
|
7 |
+
branches: [main]
|
8 |
|
9 |
jobs:
|
10 |
code-quality:
|
11 |
runs-on: ubuntu-latest
|
12 |
steps:
|
13 |
+
- uses: actions/checkout@v3
|
14 |
+
|
15 |
+
- name: Set up Python
|
16 |
+
uses: actions/setup-python@v4
|
17 |
+
with:
|
18 |
+
python-version: "3.11"
|
19 |
+
|
20 |
+
- name: Install dependencies
|
21 |
+
run: |
|
22 |
+
python -m pip install --upgrade pip
|
23 |
+
pip install flake8 black bandit
|
24 |
+
|
25 |
+
- name: Run Black
|
26 |
+
run: black --check .
|
27 |
+
|
28 |
+
- name: Run Flake8
|
29 |
+
run: flake8 .
|
30 |
+
|
31 |
+
- name: Run Bandit
|
32 |
+
run: |
|
33 |
+
bandit -r .
|
.github/workflows/deploy_to_hf.yml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Push Production to HuggingFace
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches: [main]
|
6 |
+
|
7 |
+
# run this workflow manualy from the Actions tab
|
8 |
+
workflow_dispatch:
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
sync-to-hub:
|
12 |
+
runs-on: ubuntu-latest
|
13 |
+
steps:
|
14 |
+
- uses: actions/checkout@v4
|
15 |
+
with:
|
16 |
+
fetch-depth: 0
|
17 |
+
lfs: true
|
18 |
+
- name: Deploy Production (main) to HuggingFace
|
19 |
+
env:
|
20 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
21 |
+
run: git push --force https://faridkarimli:[email protected]/spaces/dl4ds/sp25_tutor
|
.gitignore
CHANGED
@@ -10,3 +10,5 @@ vectorstores/*
|
|
10 |
*.log
|
11 |
**/.files/*
|
12 |
.env
|
|
|
|
|
|
10 |
*.log
|
11 |
**/.files/*
|
12 |
.env
|
13 |
+
.venv/*
|
14 |
+
.venv
|
apps/ai_tutor/chainlit_app.py
CHANGED
@@ -239,23 +239,23 @@ class Chatbot:
|
|
239 |
print(e)
|
240 |
return [
|
241 |
cl.Starter(
|
242 |
-
label="
|
243 |
-
message="
|
244 |
icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
|
245 |
),
|
246 |
cl.Starter(
|
247 |
-
label="
|
248 |
-
message="
|
249 |
icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
|
250 |
),
|
251 |
cl.Starter(
|
252 |
-
label="
|
253 |
-
message="
|
254 |
icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
|
255 |
),
|
256 |
cl.Starter(
|
257 |
-
label="
|
258 |
-
message="
|
259 |
icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
|
260 |
),
|
261 |
]
|
|
|
239 |
print(e)
|
240 |
return [
|
241 |
cl.Starter(
|
242 |
+
label="What is this class about?",
|
243 |
+
message="What is this class about?",
|
244 |
icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
|
245 |
),
|
246 |
cl.Starter(
|
247 |
+
label="What is the schedule?",
|
248 |
+
message="What is the schedule?",
|
249 |
icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
|
250 |
),
|
251 |
cl.Starter(
|
252 |
+
label="Who are the instructors?",
|
253 |
+
message="Who are the instructors?",
|
254 |
icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
|
255 |
),
|
256 |
cl.Starter(
|
257 |
+
label="Will we learn about Transformers?",
|
258 |
+
message="Will we learn about Transformers?",
|
259 |
icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
|
260 |
),
|
261 |
]
|
apps/ai_tutor/config/config.yml
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
-
log_dir:
|
2 |
-
log_chunk_dir:
|
3 |
-
device:
|
4 |
|
5 |
vectorstore:
|
6 |
-
load_from_HF:
|
7 |
reparse_files: True # bool
|
8 |
-
data_path:
|
9 |
-
url_file_path:
|
10 |
expand_urls: True # bool
|
11 |
-
db_option
|
12 |
-
db_path
|
13 |
-
model
|
14 |
-
search_top_k
|
15 |
-
score_threshold
|
16 |
|
17 |
faiss_params: # Not used as of now
|
18 |
-
index_path:
|
19 |
-
index_type:
|
20 |
index_dimension: 384 # int
|
21 |
index_nlist: 100 # int
|
22 |
index_nprobe: 10 # int
|
@@ -24,37 +24,37 @@ vectorstore:
|
|
24 |
colbert_params:
|
25 |
index_name: "new_idx" # str
|
26 |
|
27 |
-
llm_params:
|
28 |
-
llm_arch:
|
29 |
use_history: True # bool
|
30 |
generate_follow_up: False # bool
|
31 |
memory_window: 3 # int
|
32 |
-
llm_style:
|
33 |
-
llm_loader:
|
34 |
openai_params:
|
35 |
temperature: 0.7 # float
|
36 |
local_llm_params:
|
37 |
temperature: 0.7 # float
|
38 |
-
repo_id:
|
39 |
-
filename:
|
40 |
-
model_path:
|
41 |
stream: False # bool
|
42 |
-
pdf_reader:
|
43 |
|
44 |
chat_logging:
|
45 |
log_chat: True # bool
|
46 |
-
platform:
|
47 |
callbacks: True # bool
|
48 |
|
49 |
splitter_options:
|
50 |
use_splitter: True # bool
|
51 |
-
split_by_token
|
52 |
remove_leftover_delimiters: True # bool
|
53 |
remove_chunks: False # bool
|
54 |
-
chunking_mode:
|
55 |
-
chunk_size
|
56 |
-
chunk_overlap
|
57 |
-
chunk_separators
|
58 |
-
front_chunks_to_remove
|
59 |
-
last_chunks_to_remove
|
60 |
-
delimiters_to_remove
|
|
|
1 |
+
log_dir: "storage/logs" # str
|
2 |
+
log_chunk_dir: "storage/logs/chunks" # str
|
3 |
+
device: "cpu" # str [cuda, cpu]
|
4 |
|
5 |
vectorstore:
|
6 |
+
load_from_HF: False # bool
|
7 |
reparse_files: True # bool
|
8 |
+
data_path: "storage/data" # str
|
9 |
+
url_file_path: "storage/data/urls.txt" # str
|
10 |
expand_urls: True # bool
|
11 |
+
db_option: "FAISS" # str [FAISS, Chroma, RAGatouille, RAPTOR]
|
12 |
+
db_path: "vectorstores" # str
|
13 |
+
model: "sentence-transformers/all-MiniLM-L6-v2" # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
|
14 |
+
search_top_k: 5 # int
|
15 |
+
score_threshold: 0.2 # float
|
16 |
|
17 |
faiss_params: # Not used as of now
|
18 |
+
index_path: "vectorstores/faiss.index" # str
|
19 |
+
index_type: "Flat" # str [Flat, HNSW, IVF]
|
20 |
index_dimension: 384 # int
|
21 |
index_nlist: 100 # int
|
22 |
index_nprobe: 10 # int
|
|
|
24 |
colbert_params:
|
25 |
index_name: "new_idx" # str
|
26 |
|
27 |
+
llm_params:
|
28 |
+
llm_arch: "langchain" # [langchain]
|
29 |
use_history: True # bool
|
30 |
generate_follow_up: False # bool
|
31 |
memory_window: 3 # int
|
32 |
+
llm_style: "Normal" # str [Normal, ELI5]
|
33 |
+
llm_loader: "gpt-4o-mini" # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
|
34 |
openai_params:
|
35 |
temperature: 0.7 # float
|
36 |
local_llm_params:
|
37 |
temperature: 0.7 # float
|
38 |
+
repo_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
|
39 |
+
filename: "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
|
40 |
+
model_path: "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
|
41 |
stream: False # bool
|
42 |
+
pdf_reader: "pymupdf" # str [llama, pymupdf, gpt]
|
43 |
|
44 |
chat_logging:
|
45 |
log_chat: True # bool
|
46 |
+
platform: "literalai"
|
47 |
callbacks: True # bool
|
48 |
|
49 |
splitter_options:
|
50 |
use_splitter: True # bool
|
51 |
+
split_by_token: True # bool
|
52 |
remove_leftover_delimiters: True # bool
|
53 |
remove_chunks: False # bool
|
54 |
+
chunking_mode: "semantic" # str [fixed, semantic]
|
55 |
+
chunk_size: 1000 # int
|
56 |
+
chunk_overlap: 100 # int
|
57 |
+
chunk_separators: ["\n\n", "\n", " ", ""] # list of strings
|
58 |
+
front_chunks_to_remove: null # int or None
|
59 |
+
last_chunks_to_remove: null # int or None
|
60 |
+
delimiters_to_remove: ['\t', '\n', " ", " "] # list of strings
|
apps/ai_tutor/config/project_config.yml
CHANGED
@@ -3,15 +3,55 @@ retriever:
|
|
3 |
RAGatouille: "XThomasBU/Colbert_Index"
|
4 |
|
5 |
metadata:
|
6 |
-
metadata_links:
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
token_config:
|
10 |
cooldown_time: 60
|
11 |
regen_time: 180
|
12 |
-
tokens_left:
|
13 |
all_time_tokens_allocated: 1000000
|
14 |
|
|
|
|
|
|
|
|
|
|
|
15 |
misc:
|
16 |
github_repo: "https://github.com/edubotics-ai/edubot-core"
|
17 |
docs_website: "https://dl4ds.github.io/dl4ds_tutor/"
|
|
|
3 |
RAGatouille: "XThomasBU/Colbert_Index"
|
4 |
|
5 |
metadata:
|
6 |
+
metadata_links:
|
7 |
+
[
|
8 |
+
"https://dl4ds.github.io/sp2025/",
|
9 |
+
"https://dl4ds.github.io/sp2025/schedule/",
|
10 |
+
]
|
11 |
+
slide_base_link:
|
12 |
+
"https://dl4ds.github.io"
|
13 |
+
|
14 |
+
# Assignment base link is used to find the webpage where the assignment is described/posted
|
15 |
+
assignment_base_link: "https://tools4ds.github.io/fa2024/assignments/"
|
16 |
+
|
17 |
+
# Define content types - assignments, lectures, etc.
|
18 |
+
content_types:
|
19 |
+
- "lectures"
|
20 |
+
- "assignments"
|
21 |
+
- "discussion"
|
22 |
+
- "other"
|
23 |
+
|
24 |
+
# These need to be patterns from URLs that can be used to identify the type of content uniquely
|
25 |
+
lectures_pattern: "/lectures/"
|
26 |
+
assignments_pattern: "/assignments/"
|
27 |
+
discussion_pattern: "/discussion/"
|
28 |
+
project_pattern: "/project/"
|
29 |
+
|
30 |
+
# These are fields that can be extracted from the webpages of the course content
|
31 |
+
lecture_metadata_fields:
|
32 |
+
- "title"
|
33 |
+
- "tldr"
|
34 |
+
- "date"
|
35 |
+
- "lecture_recording"
|
36 |
+
- "suggested_readings"
|
37 |
+
|
38 |
+
assignment_metadata_fields:
|
39 |
+
- "title"
|
40 |
+
- "release_date"
|
41 |
+
- "due_date"
|
42 |
+
- "source_file"
|
43 |
|
44 |
token_config:
|
45 |
cooldown_time: 60
|
46 |
regen_time: 180
|
47 |
+
tokens_left: 50000
|
48 |
all_time_tokens_allocated: 1000000
|
49 |
|
50 |
+
content:
|
51 |
+
notebookheaders_to_split_on:
|
52 |
+
- ["##", "Section"]
|
53 |
+
- ["#", "Title"]
|
54 |
+
|
55 |
misc:
|
56 |
github_repo: "https://github.com/edubotics-ai/edubot-core"
|
57 |
docs_website: "https://dl4ds.github.io/dl4ds_tutor/"
|
apps/ai_tutor/public/files/students_encrypted.json
CHANGED
@@ -1 +1,21 @@
|
|
1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"7810b25bef84317130e2a59da978ee716bb96f6a8a9296c051b7ad4108aa8e6a": [
|
3 |
+
"admin",
|
4 |
+
"student",
|
5 |
+
"bu"
|
6 |
+
],
|
7 |
+
"0bf8b6cca820bd8628a31d8d44a7b94fcd6d058c9d5a0c52b7ffdf01ac5ce310": [
|
8 |
+
"student",
|
9 |
+
"bu"
|
10 |
+
],
|
11 |
+
"0645db6f7b415e3b04a4fc327151c3c7bbcd25ec546ee0b3604957b571a79bc2": [
|
12 |
+
"admin",
|
13 |
+
"instructor",
|
14 |
+
"bu"
|
15 |
+
],
|
16 |
+
"a95f36e2700c554639d3522834b47733f5ed1f05c5a43d04ac2575571dd43563": [
|
17 |
+
"admin",
|
18 |
+
"instructor",
|
19 |
+
"bu"
|
20 |
+
]
|
21 |
+
}
|
apps/ai_tutor/storage/data/urls.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
https://dl4ds.github.io/
|
|
|
1 |
+
https://dl4ds.github.io/sp2025/
|