Spaces:

dl4ds
/

sp25_tutor

Running

App Files Files Community

Farid Karimli commited on 4 days ago

Commit

527151b

1 Parent(s): c028257

Retarget to Spring 25

Browse files

Files changed (8) hide show

.github/workflows/code_quality_check.yml +23 -23
.github/workflows/deploy_to_hf.yml +21 -0
.gitignore +2 -0
apps/ai_tutor/chainlit_app.py +8 -8
apps/ai_tutor/config/config.yml +30 -30
apps/ai_tutor/config/project_config.yml +43 -3
apps/ai_tutor/public/files/students_encrypted.json +21 -1
apps/ai_tutor/storage/data/urls.txt +1 -1

.github/workflows/code_quality_check.yml CHANGED Viewed

@@ -2,32 +2,32 @@ name: Code Quality and Security Checks
 on:
   push:
-    branches: [ main]
   pull_request:
-    branches: [ main ]
 jobs:
   code-quality:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.11'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install flake8 black bandit
-    - name: Run Black
-      run: black --check .
-    - name: Run Flake8
-      run: flake8 .
-    - name: Run Bandit
-      run: |
-        bandit -r .

 on:
   push:
+    branches: [main]
   pull_request:
+    branches: [main]
 jobs:
   code-quality:
     runs-on: ubuntu-latest
     steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install flake8 black bandit
+      - name: Run Black
+        run: black --check .
+      - name: Run Flake8
+        run: flake8 .
+      - name: Run Bandit
+        run: |
+          bandit -r .

.github/workflows/deploy_to_hf.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+name: Push Production to HuggingFace
+on:
+  push:
+    branches: [main]
+  # run this workflow manualy from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Deploy Production (main) to HuggingFace
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://faridkarimli:[email protected]/spaces/dl4ds/sp25_tutor

.gitignore CHANGED Viewed

@@ -10,3 +10,5 @@ vectorstores/*
 *.log
 **/.files/*
 .env

 *.log
 **/.files/*
 .env
+.venv/*
+.venv

apps/ai_tutor/chainlit_app.py CHANGED Viewed

@@ -239,23 +239,23 @@ class Chatbot:
             print(e)
             return [
                 cl.Starter(
-                    label="recording on Transformers?",
-                    message="Where can I find the recording for the lecture on Transformers?",
                     icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
                 ),
                 cl.Starter(
-                    label="where's the schedule?",
-                    message="When are the lectures? I can't find the schedule.",
                     icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
                 ),
                 cl.Starter(
-                    label="Due Date?",
-                    message="When is the final project due?",
                     icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
                 ),
                 cl.Starter(
-                    label="Explain backprop.",
-                    message="I didn't understand the math behind backprop, could you explain it?",
                     icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
                 ),
             ]

             print(e)
             return [
                 cl.Starter(
+                    label="What is this class about?",
+                    message="What is this class about?",
                     icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg",
                 ),
                 cl.Starter(
+                    label="What is the schedule?",
+                    message="What is the schedule?",
                     icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg",
                 ),
                 cl.Starter(
+                    label="Who are the instructors?",
+                    message="Who are the instructors?",
                     icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg",
                 ),
                 cl.Starter(
+                    label="Will we learn about Transformers?",
+                    message="Will we learn about Transformers?",
                     icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg",
                 ),
             ]

apps/ai_tutor/config/config.yml CHANGED Viewed

@@ -1,22 +1,22 @@
-log_dir: 'storage/logs' # str
-log_chunk_dir: 'storage/logs/chunks' # str
-device: 'cpu' # str [cuda, cpu]
 vectorstore:
-  load_from_HF: True # bool
   reparse_files: True # bool
-  data_path: 'storage/data' # str
-  url_file_path: 'storage/data/urls.txt' # str
   expand_urls: True # bool
-  db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
-  db_path : 'vectorstores' # str
-  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
-  search_top_k : 3 # int
-  score_threshold : 0.2 # float
   faiss_params: # Not used as of now
-    index_path: 'vectorstores/faiss.index' # str
-    index_type: 'Flat' # str [Flat, HNSW, IVF]
     index_dimension: 384 # int
     index_nlist: 100 # int
     index_nprobe: 10 # int
@@ -24,37 +24,37 @@ vectorstore:
   colbert_params:
     index_name: "new_idx" # str
-llm_params:
-  llm_arch: 'langchain' # [langchain]
   use_history: True # bool
   generate_follow_up: False # bool
   memory_window: 3 # int
-  llm_style: 'Normal' # str [Normal, ELI5]
-  llm_loader: 'gpt-4o-mini' # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
   openai_params:
     temperature: 0.7 # float
   local_llm_params:
     temperature: 0.7 # float
-    repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
-    filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
-    model_path: 'storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Path to the model file
   stream: False # bool
-  pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
 chat_logging:
   log_chat: True # bool
-  platform: 'literalai'
   callbacks: True # bool
 splitter_options:
   use_splitter: True # bool
-  split_by_token : True # bool
   remove_leftover_delimiters: True # bool
   remove_chunks: False # bool
-  chunking_mode: 'semantic' # str [fixed, semantic]
-  chunk_size : 300 # int
-  chunk_overlap : 30 # int
-  chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
-  front_chunks_to_remove : null # int or None
-  last_chunks_to_remove : null # int or None
-  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings

+log_dir: "storage/logs" # str
+log_chunk_dir: "storage/logs/chunks" # str
+device: "cpu" # str [cuda, cpu]
 vectorstore:
+  load_from_HF: False # bool
   reparse_files: True # bool
+  data_path: "storage/data" # str
+  url_file_path: "storage/data/urls.txt" # str
   expand_urls: True # bool
+  db_option: "FAISS" # str [FAISS, Chroma, RAGatouille, RAPTOR]
+  db_path: "vectorstores" # str
+  model: "sentence-transformers/all-MiniLM-L6-v2" # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
+  search_top_k: 5 # int
+  score_threshold: 0.2 # float
   faiss_params: # Not used as of now
+    index_path: "vectorstores/faiss.index" # str
+    index_type: "Flat" # str [Flat, HNSW, IVF]
     index_dimension: 384 # int
     index_nlist: 100 # int
     index_nprobe: 10 # int
   colbert_params:
     index_name: "new_idx" # str
+llm_params:
+  llm_arch: "langchain" # [langchain]
   use_history: True # bool
   generate_follow_up: False # bool
   memory_window: 3 # int
+  llm_style: "Normal" # str [Normal, ELI5]
+  llm_loader: "gpt-4o-mini" # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
   openai_params:
     temperature: 0.7 # float
   local_llm_params:
     temperature: 0.7 # float
+    repo_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
+    filename: "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
+    model_path: "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
   stream: False # bool
+  pdf_reader: "pymupdf" # str [llama, pymupdf, gpt]
 chat_logging:
   log_chat: True # bool
+  platform: "literalai"
   callbacks: True # bool
 splitter_options:
   use_splitter: True # bool
+  split_by_token: True # bool
   remove_leftover_delimiters: True # bool
   remove_chunks: False # bool
+  chunking_mode: "semantic" # str [fixed, semantic]
+  chunk_size: 1000 # int
+  chunk_overlap: 100 # int
+  chunk_separators: ["\n\n", "\n", " ", ""] # list of strings
+  front_chunks_to_remove: null # int or None
+  last_chunks_to_remove: null # int or None
+  delimiters_to_remove: ['\t', '\n', "   ", "  "] # list of strings

apps/ai_tutor/config/project_config.yml CHANGED Viewed

@@ -3,15 +3,55 @@ retriever:
     RAGatouille: "XThomasBU/Colbert_Index"
 metadata:
-  metadata_links: ["https://dl4ds.github.io/sp2024/lectures/", "https://dl4ds.github.io/sp2024/schedule/"]
-  slide_base_link: "https://dl4ds.github.io"
 token_config:
   cooldown_time: 60
   regen_time: 180
-  tokens_left: 2000
   all_time_tokens_allocated: 1000000
 misc:
   github_repo: "https://github.com/edubotics-ai/edubot-core"
   docs_website: "https://dl4ds.github.io/dl4ds_tutor/"

     RAGatouille: "XThomasBU/Colbert_Index"
 metadata:
+  metadata_links:
+    [
+      "https://dl4ds.github.io/sp2025/",
+      "https://dl4ds.github.io/sp2025/schedule/",
+    ]
+  slide_base_link:
+    "https://dl4ds.github.io"
+    # Assignment base link is used to find the webpage where the assignment is described/posted
+  assignment_base_link: "https://tools4ds.github.io/fa2024/assignments/"
+  # Define content types - assignments, lectures, etc.
+  content_types:
+    - "lectures"
+    - "assignments"
+    - "discussion"
+    - "other"
+  # These need to be patterns from URLs that can be used to identify the type of content uniquely
+  lectures_pattern: "/lectures/"
+  assignments_pattern: "/assignments/"
+  discussion_pattern: "/discussion/"
+  project_pattern: "/project/"
+  # These are fields that can be extracted from the webpages of the course content
+  lecture_metadata_fields:
+    - "title"
+    - "tldr"
+    - "date"
+    - "lecture_recording"
+    - "suggested_readings"
+  assignment_metadata_fields:
+    - "title"
+    - "release_date"
+    - "due_date"
+    - "source_file"
 token_config:
   cooldown_time: 60
   regen_time: 180
+  tokens_left: 50000
   all_time_tokens_allocated: 1000000
+content:
+  notebookheaders_to_split_on:
+    - ["##", "Section"]
+    - ["#", "Title"]
 misc:
   github_repo: "https://github.com/edubotics-ai/edubot-core"
   docs_website: "https://dl4ds.github.io/dl4ds_tutor/"

apps/ai_tutor/public/files/students_encrypted.json CHANGED Viewed

	@@ -1 +1,21 @@
1	- {"0645db6f7b415e3b04a4fc327151c3c7bbcd25ec546ee0b3604957b571a79bc2": ["instructor", "bu"], "51ebf87ac51618300acfef8bfa9768fdee40e2d3f39cfb4ae8a76722ee336de4": ["admin", "instructor", "bu"], "7810b25bef84317130e2a59da978ee716bb96f6a8a9296c051b7ad4108aa8e6a": ["instructor", "bu"], "a95f36e2700c554639d3522834b47733f5ed1f05c5a43d04ac2575571dd43563": ["student", "bu"]}

+{
+  "7810b25bef84317130e2a59da978ee716bb96f6a8a9296c051b7ad4108aa8e6a": [
+    "admin",
+    "student",
+    "bu"
+  ],
+  "0bf8b6cca820bd8628a31d8d44a7b94fcd6d058c9d5a0c52b7ffdf01ac5ce310": [
+    "student",
+    "bu"
+  ],
+  "0645db6f7b415e3b04a4fc327151c3c7bbcd25ec546ee0b3604957b571a79bc2": [
+    "admin",
+    "instructor",
+    "bu"
+  ],
+  "a95f36e2700c554639d3522834b47733f5ed1f05c5a43d04ac2575571dd43563": [
+    "admin",
+    "instructor",
+    "bu"
+  ]
+}

apps/ai_tutor/storage/data/urls.txt CHANGED Viewed

	@@ -1 +1 @@
1	- https://dl4ds.github.io/~~sp2024~~/


1	+ https://dl4ds.github.io/sp2025/