{ | |
"trainer": { | |
"evaluation_strategy": "steps", | |
"per_device_train_batch_size": 1, | |
"per_device_eval_batch_size": 1, | |
"gradient_accumulation_steps": 128, | |
"eval_steps": 50, | |
"save_steps": 50, | |
"logging_steps": 2, | |
"learning_rate": 0.0003, | |
"num_train_epochs": 5, | |
"lr_scheduler_type": "cosine", | |
"warmup_steps": 50, | |
"fp16": false, | |
"bf16": true, | |
"torch_compile": false, | |
"optim": "adamw_torch" | |
}, | |
"lora": { | |
"r": 16, | |
"lora_alpha": 16, | |
"lora_dropout": 0.05, | |
"bias": "none", | |
"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"], | |
"task_type": "CAUSAL_LM" | |
}, | |
"load_in_8bit": true, | |
"only_target_loss": true, | |
"mode": "chat", | |
"templates_path": "internal_prompts/saiga_v2.json", | |
"model_name": "models/llama2-7b", | |
"model_type": "causal", | |
"max_tokens_count": 4096 | |
} | |