File size: 2,098 Bytes
ac3c353
 
f51bb92
 
 
e19e333
1e2550f
ac3c353
 
f2daaee
638bffe
ac3c353
6158da4
b83cc65
f0018f2
f51bb92
 
d697aa5
f51bb92
 
 
 
 
e5cd1d3
 
 
6158da4
e19e333
6d056d5
e19e333
db6b619
b409192
9b7a7cf
6158da4
e029e22
f2daaee
e029e22
d1afae8
 
3a1356f
e19e333
679cb58
f2daaee
 
e19e333
f2daaee
3a1356f
f51bb92
6158da4
 
 
 
 
679cb58
b83cc65
 
6158da4
 
 
902a706
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
log_dir: 'storage/logs' # str
log_chunk_dir: 'storage/logs/chunks' # str
device: 'cpu' # str [cuda, cpu]

vectorstore:
  load_from_HF: True # bool
  reparse_files: True # bool
  data_path: 'storage/data' # str
  url_file_path: 'storage/data/urls.txt' # str
  expand_urls: True # bool
  db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
  db_path : 'vectorstores' # str
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
  search_top_k : 3 # int
  score_threshold : 0.2 # float

  faiss_params: # Not used as of now
    index_path: 'vectorstores/faiss.index' # str
    index_type: 'Flat' # str [Flat, HNSW, IVF]
    index_dimension: 384 # int
    index_nlist: 100 # int
    index_nprobe: 10 # int

  colbert_params:
    index_name: "new_idx" # str

llm_params: 
  llm_arch: 'langchain' # [langchain]
  use_history: True # bool
  generate_follow_up: False # bool
  memory_window: 3 # int
  llm_style: 'Normal' # str [Normal, ELI5]
  llm_loader: 'gpt-4o-mini' # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
  openai_params:
    temperature: 0.7 # float
  local_llm_params:
    temperature: 0.7 # float
    repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
    filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
    model_path: 'storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Path to the model file
  stream: False # bool
  pdf_reader: 'gpt' # str [llama, pymupdf, gpt]

chat_logging:
  log_chat: True # bool
  platform: 'literalai'
  callbacks: True # bool

splitter_options:
  use_splitter: True # bool
  split_by_token : True # bool
  remove_leftover_delimiters: True # bool
  remove_chunks: False # bool
  chunking_mode: 'semantic' # str [fixed, semantic]
  chunk_size : 300 # int
  chunk_overlap : 30 # int
  chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
  front_chunks_to_remove : null # int or None
  last_chunks_to_remove : null # int or None
  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings