File size: 2,098 Bytes
ac3c353 f51bb92 e19e333 1e2550f ac3c353 f2daaee 638bffe ac3c353 6158da4 b83cc65 f0018f2 f51bb92 d697aa5 f51bb92 e5cd1d3 6158da4 e19e333 6d056d5 e19e333 db6b619 b409192 9b7a7cf 6158da4 e029e22 f2daaee e029e22 d1afae8 3a1356f e19e333 679cb58 f2daaee e19e333 f2daaee 3a1356f f51bb92 6158da4 679cb58 b83cc65 6158da4 902a706 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
log_dir: 'storage/logs' # str
log_chunk_dir: 'storage/logs/chunks' # str
device: 'cpu' # str [cuda, cpu]
vectorstore:
load_from_HF: True # bool
reparse_files: True # bool
data_path: 'storage/data' # str
url_file_path: 'storage/data/urls.txt' # str
expand_urls: True # bool
db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
db_path : 'vectorstores' # str
model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
search_top_k : 3 # int
score_threshold : 0.2 # float
faiss_params: # Not used as of now
index_path: 'vectorstores/faiss.index' # str
index_type: 'Flat' # str [Flat, HNSW, IVF]
index_dimension: 384 # int
index_nlist: 100 # int
index_nprobe: 10 # int
colbert_params:
index_name: "new_idx" # str
llm_params:
llm_arch: 'langchain' # [langchain]
use_history: True # bool
generate_follow_up: False # bool
memory_window: 3 # int
llm_style: 'Normal' # str [Normal, ELI5]
llm_loader: 'gpt-4o-mini' # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
openai_params:
temperature: 0.7 # float
local_llm_params:
temperature: 0.7 # float
repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
model_path: 'storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Path to the model file
stream: False # bool
pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
chat_logging:
log_chat: True # bool
platform: 'literalai'
callbacks: True # bool
splitter_options:
use_splitter: True # bool
split_by_token : True # bool
remove_leftover_delimiters: True # bool
remove_chunks: False # bool
chunking_mode: 'semantic' # str [fixed, semantic]
chunk_size : 300 # int
chunk_overlap : 30 # int
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
front_chunks_to_remove : null # int or None
last_chunks_to_remove : null # int or None
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|