Farid Karimli commited on
Commit
527151b
·
1 Parent(s): c028257

Retarget to Spring 25

Browse files
.github/workflows/code_quality_check.yml CHANGED
@@ -2,32 +2,32 @@ name: Code Quality and Security Checks
2
 
3
  on:
4
  push:
5
- branches: [ main]
6
  pull_request:
7
- branches: [ main ]
8
 
9
  jobs:
10
  code-quality:
11
  runs-on: ubuntu-latest
12
  steps:
13
- - uses: actions/checkout@v3
14
-
15
- - name: Set up Python
16
- uses: actions/setup-python@v4
17
- with:
18
- python-version: '3.11'
19
-
20
- - name: Install dependencies
21
- run: |
22
- python -m pip install --upgrade pip
23
- pip install flake8 black bandit
24
-
25
- - name: Run Black
26
- run: black --check .
27
-
28
- - name: Run Flake8
29
- run: flake8 .
30
-
31
- - name: Run Bandit
32
- run: |
33
- bandit -r .
 
2
 
3
  on:
4
  push:
5
+ branches: [main]
6
  pull_request:
7
+ branches: [main]
8
 
9
  jobs:
10
  code-quality:
11
  runs-on: ubuntu-latest
12
  steps:
13
+ - uses: actions/checkout@v3
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v4
17
+ with:
18
+ python-version: "3.11"
19
+
20
+ - name: Install dependencies
21
+ run: |
22
+ python -m pip install --upgrade pip
23
+ pip install flake8 black bandit
24
+
25
+ - name: Run Black
26
+ run: black --check .
27
+
28
+ - name: Run Flake8
29
+ run: flake8 .
30
+
31
+ - name: Run Bandit
32
+ run: |
33
+ bandit -r .
.github/workflows/deploy_to_hf.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Push Production to HuggingFace
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ # run this workflow manualy from the Actions tab
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ sync-to-hub:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ with:
16
+ fetch-depth: 0
17
+ lfs: true
18
+ - name: Deploy Production (main) to HuggingFace
19
+ env:
20
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
21
+ run: git push --force https://faridkarimli:[email protected]/spaces/dl4ds/sp25_tutor
.gitignore CHANGED
@@ -10,3 +10,5 @@ vectorstores/*
10
  *.log
11
  **/.files/*
12
  .env
 
 
 
10
  *.log
11
  **/.files/*
12
  .env
13
+ .venv/*
14
+ .venv
apps/ai_tutor/chainlit_app.py CHANGED
@@ -239,23 +239,23 @@ class Chatbot:
239
  print(e)
240
  return [
241
  cl.Starter(
242
- label="recording on Transformers?",
243
- message="Where can I find the recording for the lecture on Transformers?",
244
  icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
245
  ),
246
  cl.Starter(
247
- label="where's the schedule?",
248
- message="When are the lectures? I can't find the schedule.",
249
  icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
250
  ),
251
  cl.Starter(
252
- label="Due Date?",
253
- message="When is the final project due?",
254
  icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
255
  ),
256
  cl.Starter(
257
- label="Explain backprop.",
258
- message="I didn't understand the math behind backprop, could you explain it?",
259
  icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
260
  ),
261
  ]
 
239
  print(e)
240
  return [
241
  cl.Starter(
242
+ label="What is this class about?",
243
+ message="What is this class about?",
244
  icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
245
  ),
246
  cl.Starter(
247
+ label="What is the schedule?",
248
+ message="What is the schedule?",
249
  icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
250
  ),
251
  cl.Starter(
252
+ label="Who are the instructors?",
253
+ message="Who are the instructors?",
254
  icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
255
  ),
256
  cl.Starter(
257
+ label="Will we learn about Transformers?",
258
+ message="Will we learn about Transformers?",
259
  icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
260
  ),
261
  ]
apps/ai_tutor/config/config.yml CHANGED
@@ -1,22 +1,22 @@
1
- log_dir: 'storage/logs' # str
2
- log_chunk_dir: 'storage/logs/chunks' # str
3
- device: 'cpu' # str [cuda, cpu]
4
 
5
  vectorstore:
6
- load_from_HF: True # bool
7
  reparse_files: True # bool
8
- data_path: 'storage/data' # str
9
- url_file_path: 'storage/data/urls.txt' # str
10
  expand_urls: True # bool
11
- db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
12
- db_path : 'vectorstores' # str
13
- model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
14
- search_top_k : 3 # int
15
- score_threshold : 0.2 # float
16
 
17
  faiss_params: # Not used as of now
18
- index_path: 'vectorstores/faiss.index' # str
19
- index_type: 'Flat' # str [Flat, HNSW, IVF]
20
  index_dimension: 384 # int
21
  index_nlist: 100 # int
22
  index_nprobe: 10 # int
@@ -24,37 +24,37 @@ vectorstore:
24
  colbert_params:
25
  index_name: "new_idx" # str
26
 
27
- llm_params:
28
- llm_arch: 'langchain' # [langchain]
29
  use_history: True # bool
30
  generate_follow_up: False # bool
31
  memory_window: 3 # int
32
- llm_style: 'Normal' # str [Normal, ELI5]
33
- llm_loader: 'gpt-4o-mini' # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
34
  openai_params:
35
  temperature: 0.7 # float
36
  local_llm_params:
37
  temperature: 0.7 # float
38
- repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
39
- filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
40
- model_path: 'storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Path to the model file
41
  stream: False # bool
42
- pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
43
 
44
  chat_logging:
45
  log_chat: True # bool
46
- platform: 'literalai'
47
  callbacks: True # bool
48
 
49
  splitter_options:
50
  use_splitter: True # bool
51
- split_by_token : True # bool
52
  remove_leftover_delimiters: True # bool
53
  remove_chunks: False # bool
54
- chunking_mode: 'semantic' # str [fixed, semantic]
55
- chunk_size : 300 # int
56
- chunk_overlap : 30 # int
57
- chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
58
- front_chunks_to_remove : null # int or None
59
- last_chunks_to_remove : null # int or None
60
- delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
 
1
+ log_dir: "storage/logs" # str
2
+ log_chunk_dir: "storage/logs/chunks" # str
3
+ device: "cpu" # str [cuda, cpu]
4
 
5
  vectorstore:
6
+ load_from_HF: False # bool
7
  reparse_files: True # bool
8
+ data_path: "storage/data" # str
9
+ url_file_path: "storage/data/urls.txt" # str
10
  expand_urls: True # bool
11
+ db_option: "FAISS" # str [FAISS, Chroma, RAGatouille, RAPTOR]
12
+ db_path: "vectorstores" # str
13
+ model: "sentence-transformers/all-MiniLM-L6-v2" # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
14
+ search_top_k: 5 # int
15
+ score_threshold: 0.2 # float
16
 
17
  faiss_params: # Not used as of now
18
+ index_path: "vectorstores/faiss.index" # str
19
+ index_type: "Flat" # str [Flat, HNSW, IVF]
20
  index_dimension: 384 # int
21
  index_nlist: 100 # int
22
  index_nprobe: 10 # int
 
24
  colbert_params:
25
  index_name: "new_idx" # str
26
 
27
+ llm_params:
28
+ llm_arch: "langchain" # [langchain]
29
  use_history: True # bool
30
  generate_follow_up: False # bool
31
  memory_window: 3 # int
32
+ llm_style: "Normal" # str [Normal, ELI5]
33
+ llm_loader: "gpt-4o-mini" # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
34
  openai_params:
35
  temperature: 0.7 # float
36
  local_llm_params:
37
  temperature: 0.7 # float
38
+ repo_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
39
+ filename: "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
40
+ model_path: "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
41
  stream: False # bool
42
+ pdf_reader: "pymupdf" # str [llama, pymupdf, gpt]
43
 
44
  chat_logging:
45
  log_chat: True # bool
46
+ platform: "literalai"
47
  callbacks: True # bool
48
 
49
  splitter_options:
50
  use_splitter: True # bool
51
+ split_by_token: True # bool
52
  remove_leftover_delimiters: True # bool
53
  remove_chunks: False # bool
54
+ chunking_mode: "semantic" # str [fixed, semantic]
55
+ chunk_size: 1000 # int
56
+ chunk_overlap: 100 # int
57
+ chunk_separators: ["\n\n", "\n", " ", ""] # list of strings
58
+ front_chunks_to_remove: null # int or None
59
+ last_chunks_to_remove: null # int or None
60
+ delimiters_to_remove: ['\t', '\n', " ", " "] # list of strings
apps/ai_tutor/config/project_config.yml CHANGED
@@ -3,15 +3,55 @@ retriever:
3
  RAGatouille: "XThomasBU/Colbert_Index"
4
 
5
  metadata:
6
- metadata_links: ["https://dl4ds.github.io/sp2024/lectures/", "https://dl4ds.github.io/sp2024/schedule/"]
7
- slide_base_link: "https://dl4ds.github.io"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  token_config:
10
  cooldown_time: 60
11
  regen_time: 180
12
- tokens_left: 2000
13
  all_time_tokens_allocated: 1000000
14
 
 
 
 
 
 
15
  misc:
16
  github_repo: "https://github.com/edubotics-ai/edubot-core"
17
  docs_website: "https://dl4ds.github.io/dl4ds_tutor/"
 
3
  RAGatouille: "XThomasBU/Colbert_Index"
4
 
5
  metadata:
6
+ metadata_links:
7
+ [
8
+ "https://dl4ds.github.io/sp2025/",
9
+ "https://dl4ds.github.io/sp2025/schedule/",
10
+ ]
11
+ slide_base_link:
12
+ "https://dl4ds.github.io"
13
+
14
+ # Assignment base link is used to find the webpage where the assignment is described/posted
15
+ assignment_base_link: "https://tools4ds.github.io/fa2024/assignments/"
16
+
17
+ # Define content types - assignments, lectures, etc.
18
+ content_types:
19
+ - "lectures"
20
+ - "assignments"
21
+ - "discussion"
22
+ - "other"
23
+
24
+ # These need to be patterns from URLs that can be used to identify the type of content uniquely
25
+ lectures_pattern: "/lectures/"
26
+ assignments_pattern: "/assignments/"
27
+ discussion_pattern: "/discussion/"
28
+ project_pattern: "/project/"
29
+
30
+ # These are fields that can be extracted from the webpages of the course content
31
+ lecture_metadata_fields:
32
+ - "title"
33
+ - "tldr"
34
+ - "date"
35
+ - "lecture_recording"
36
+ - "suggested_readings"
37
+
38
+ assignment_metadata_fields:
39
+ - "title"
40
+ - "release_date"
41
+ - "due_date"
42
+ - "source_file"
43
 
44
  token_config:
45
  cooldown_time: 60
46
  regen_time: 180
47
+ tokens_left: 50000
48
  all_time_tokens_allocated: 1000000
49
 
50
+ content:
51
+ notebookheaders_to_split_on:
52
+ - ["##", "Section"]
53
+ - ["#", "Title"]
54
+
55
  misc:
56
  github_repo: "https://github.com/edubotics-ai/edubot-core"
57
  docs_website: "https://dl4ds.github.io/dl4ds_tutor/"
apps/ai_tutor/public/files/students_encrypted.json CHANGED
@@ -1 +1,21 @@
1
- {"0645db6f7b415e3b04a4fc327151c3c7bbcd25ec546ee0b3604957b571a79bc2": ["instructor", "bu"], "51ebf87ac51618300acfef8bfa9768fdee40e2d3f39cfb4ae8a76722ee336de4": ["admin", "instructor", "bu"], "7810b25bef84317130e2a59da978ee716bb96f6a8a9296c051b7ad4108aa8e6a": ["instructor", "bu"], "a95f36e2700c554639d3522834b47733f5ed1f05c5a43d04ac2575571dd43563": ["student", "bu"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "7810b25bef84317130e2a59da978ee716bb96f6a8a9296c051b7ad4108aa8e6a": [
3
+ "admin",
4
+ "student",
5
+ "bu"
6
+ ],
7
+ "0bf8b6cca820bd8628a31d8d44a7b94fcd6d058c9d5a0c52b7ffdf01ac5ce310": [
8
+ "student",
9
+ "bu"
10
+ ],
11
+ "0645db6f7b415e3b04a4fc327151c3c7bbcd25ec546ee0b3604957b571a79bc2": [
12
+ "admin",
13
+ "instructor",
14
+ "bu"
15
+ ],
16
+ "a95f36e2700c554639d3522834b47733f5ed1f05c5a43d04ac2575571dd43563": [
17
+ "admin",
18
+ "instructor",
19
+ "bu"
20
+ ]
21
+ }
apps/ai_tutor/storage/data/urls.txt CHANGED
@@ -1 +1 @@
1
- https://dl4ds.github.io/sp2024/
 
1
+ https://dl4ds.github.io/sp2025/