Spaces:

dl4ds
/

sp25_tutor

Running

Farid Karimli commited on 12 days ago

Commit

0f736a4

1 Parent(s): 96bddf1

Config change

Files changed (1) hide show

apps/ai_tutor/config/config.yml CHANGED Viewed

@@ -7,7 +7,7 @@ vectorstore:
   reparse_files: True # bool
   data_path: "storage/data" # str
   url_file_path: "storage/data/urls.txt" # str
-  expand_urls: True # bool
   db_option: "FAISS" # str [FAISS, Chroma, RAGatouille, RAPTOR]
   db_path: "vectorstores" # str
   model: "sentence-transformers/all-MiniLM-L6-v2" # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
@@ -38,7 +38,7 @@ llm_params:
     repo_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
     filename: "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
     model_path: "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
-  stream: False # bool
   pdf_reader: "pymupdf" # str [llama, pymupdf, gpt]
 chat_logging:
@@ -51,9 +51,9 @@ splitter_options:
   split_by_token: True # bool
   remove_leftover_delimiters: True # bool
   remove_chunks: False # bool
-  chunking_mode: "semantic" # str [fixed, semantic]
-  chunk_size: 1000 # int
-  chunk_overlap: 100 # int
   chunk_separators: ["\n\n", "\n", " ", ""] # list of strings
   front_chunks_to_remove: null # int or None
   last_chunks_to_remove: null # int or None

   reparse_files: True # bool
   data_path: "storage/data" # str
   url_file_path: "storage/data/urls.txt" # str
+  expand_urls: False # bool
   db_option: "FAISS" # str [FAISS, Chroma, RAGatouille, RAPTOR]
   db_path: "vectorstores" # str
   model: "sentence-transformers/all-MiniLM-L6-v2" # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
     repo_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
     filename: "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
     model_path: "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
+  stream: True # bool
   pdf_reader: "pymupdf" # str [llama, pymupdf, gpt]
 chat_logging:
   split_by_token: True # bool
   remove_leftover_delimiters: True # bool
   remove_chunks: False # bool
+  chunking_mode: "fixed" # str [fixed, semantic]
+  chunk_size: 500 # int
+  chunk_overlap: 50 # int
   chunk_separators: ["\n\n", "\n", " ", ""] # list of strings
   front_chunks_to_remove: null # int or None
   last_chunks_to_remove: null # int or None