File size: 1,748 Bytes
f51bb92
 
 
 
 
f2beb6a
 
ce9ef3e
f51bb92
 
f2daaee
e029e22
d697aa5
6158da4
b83cc65
f0018f2
f51bb92
 
d697aa5
f51bb92
 
 
 
 
e5cd1d3
 
 
6158da4
e029e22
6d056d5
db6b619
e029e22
 
6158da4
e029e22
f2daaee
e029e22
9d89b34
f2daaee
 
7f989d6
f2daaee
f51bb92
6158da4
 
 
 
 
b83cc65
 
6158da4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
log_dir: '../storage/logs' # str
log_chunk_dir: '../storage/logs/chunks' # str
device: 'cpu' # str [cuda, cpu]

vectorstore:
  load_from_HF: True # bool
  HF_path: "XThomasBU/Colbert_Index" # str
  embedd_files: False # bool
  data_path: '../storage/data' # str
  url_file_path: '../storage/data/urls.txt' # str
  expand_urls: True # bool
  db_option : 'FAISS' # str [FAISS, Chroma, RAGatouille, RAPTOR]
  db_path : 'vectorstores' # str
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
  search_top_k : 3 # int
  score_threshold : 0.2 # float

  faiss_params: # Not used as of now
    index_path: 'vectorstores/faiss.index' # str
    index_type: 'Flat' # str [Flat, HNSW, IVF]
    index_dimension: 384 # int
    index_nlist: 100 # int
    index_nprobe: 10 # int

  colbert_params:
    index_name: "new_idx" # str

llm_params: 
  llm_arch: 'langchain' # [langchain, langgraph_agentic]
  use_history: True # bool
  memory_window: 3 # int
  llm_style: 'Normal' # str [Normal, ELI5, Socratic]
  llm_loader: 'gpt-3.5-turbo-1106' # str [local_llm, gpt-3.5-turbo-1106, gpt-4]
  openai_params:
    temperature: 0.7 # float
  local_llm_params:
    temperature: 0.7 # float
  stream: True # bool

chat_logging:
  log_chat: False # bool
  platform: 'literalai'

splitter_options:
  use_splitter: True # bool
  split_by_token : True # bool
  remove_leftover_delimiters: True # bool
  remove_chunks: False # bool
  chunk_size : 300 # int
  chunk_overlap : 30 # int
  chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
  front_chunks_to_remove : null # int or None
  last_chunks_to_remove : null # int or None
  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings