Crystalcareai commited on
Commit
f3a237e
1 Parent(s): 65a5134

Delete AxolotlConfig.txt

Browse files
Files changed (1) hide show
  1. AxolotlConfig.txt +0 -74
AxolotlConfig.txt DELETED
@@ -1,74 +0,0 @@
1
- base_model: Crystalcareai/Qwen-1.5-8x7B
2
- model_type: Qwen2ForCausalLM
3
- tokenizer_type: Qwen2Tokenizer
4
- trust_remote_code: true
5
-
6
-
7
- load_in_8bit: false
8
- load_in_4bit: true
9
- strict: false
10
-
11
-
12
- datasets:
13
- - path: Crystalcareai/MoD
14
- type: sharegpt
15
- dataset_prepared_path: last_run_prepared
16
- val_set_size: 0.0
17
- output_dir: ./qlora-out
18
-
19
-
20
- model_config:
21
- output_router_logits: true
22
-
23
-
24
- adapter: qlora
25
- lora_model_dir:
26
-
27
-
28
- sequence_len: 32768
29
- sample_packing: true
30
- pad_to_sequence_len: true
31
-
32
-
33
- lora_r: 32
34
- lora_alpha: 16
35
- lora_dropout: 0.05
36
- lora_target_linear: true
37
- lora_fan_in_fan_out:
38
-
39
-
40
- gradient_accumulation_steps: 2
41
- micro_batch_size: 2
42
- num_epochs: 4
43
- optimizer: adamw_bnb_8bit
44
- lr_scheduler: cosine
45
- learning_rate: 0.0002
46
-
47
-
48
- train_on_inputs: false
49
- group_by_length: false
50
- bf16: true
51
- fp16:
52
- tf32: false
53
-
54
-
55
- gradient_checkpointing: true
56
- early_stopping_patience:
57
- resume_from_checkpoint:
58
- local_rank:
59
- logging_steps: 1
60
- xformers_attention:
61
- flash_attention: true
62
-
63
-
64
- warmup_steps: 10
65
- evals_per_epoch: 4
66
- eval_table_size:
67
- eval_max_new_tokens: 128
68
- saves_per_epoch: 4
69
- debug:
70
- deepspeed: deepspeed_configs/zero2.json
71
- weight_decay: 0.0
72
- fsdp:
73
- fsdp_config:
74
- special_tokens: