Undi95 commited on
Commit
f59b33c
·
verified ·
1 Parent(s): 6200ea4

Upload axolotl_config_l6xu8_mg.yml

Browse files
Files changed (1) hide show
  1. axolotl_config_l6xu8_mg.yml +76 -0
axolotl_config_l6xu8_mg.yml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: ./mistralai_Mistral-Small-24B-Base-2501/
2
+ # optionally might have model_type or tokenizer_type
3
+ model_type: AutoModelForCausalLM
4
+ tokenizer_type: AutoTokenizer
5
+ # Automatically upload checkpoint and final model to HF
6
+ # hub_model_id: username/custom_model_name
7
+
8
+ trust_remote_code: true
9
+
10
+ wandb_project: SmallMistral-Reflex2
11
+ wandb_entity:
12
+ wandb_watch:
13
+ wandb_name:
14
+ wandb_log_model:
15
+
16
+ load_in_8bit: false
17
+ load_in_4bit: false
18
+ strict: false
19
+
20
+ model_config:
21
+ output_router_logits: true
22
+
23
+ chat_template: tokenizer_default
24
+
25
+ datasets:
26
+ - path: Undi95/R1-RP-ShareGPT3
27
+ type: chat_template
28
+ chat_template: tokenizer_default
29
+ field_messages: conversations
30
+ message_field_role: from
31
+ message_field_content: value
32
+ roles:
33
+ user: ["human", "user"]
34
+ assistant: ["gpt", "assistant"]
35
+ system: ["system"]
36
+ tool: ["tool"]
37
+
38
+ dataset_prepared_path: last_run_prepared
39
+ shuffle_merged_datasets: true
40
+ val_set_size: 0.05
41
+ output_dir: ./out
42
+
43
+ sequence_len: 4096
44
+ sample_packing: true
45
+ pad_to_sequence_len: true
46
+
47
+ gradient_accumulation_steps: 1
48
+ micro_batch_size: 1
49
+ num_epochs: 2
50
+ optimizer: adamw_bnb_8bit
51
+ lr_scheduler: cosine
52
+ learning_rate: 0.0000025
53
+
54
+ train_on_inputs: false
55
+ group_by_length: false
56
+ bf16: auto
57
+ fp16:
58
+ tf32: false
59
+
60
+ gradient_checkpointing: unsloth
61
+ early_stopping_patience:
62
+ resume_from_checkpoint:
63
+ local_rank:
64
+ logging_steps: 1
65
+ xformers_attention:
66
+ flash_attention: true
67
+
68
+ save_total_limit: 6
69
+ saves_per_epoch: 2
70
+ save_steps:
71
+ debug:
72
+ deepspeed:
73
+ weight_decay: 0.0
74
+ fsdp:
75
+ fsdp_config:
76
+ save_safetensors: true