Delta-Vector commited on
Commit
9c49288
·
verified ·
1 Parent(s): a3ea25d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -166
README.md CHANGED
@@ -1,166 +0,0 @@
1
- ---
2
- library_name: peft
3
- tags:
4
- - generated_from_trainer
5
- datasets:
6
- - NewEden/Orion-LIT
7
- base_model: NewEden_Phi4-PT-merged
8
- model-index:
9
- - name: phi4-ptv2-out-r1
10
- results: []
11
- ---
12
-
13
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
- should probably proofread and complete it, then remove this comment. -->
15
-
16
- [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
17
- <details><summary>See axolotl config</summary>
18
-
19
- axolotl version: `0.6.0`
20
- ```yaml
21
- base_model: NewEden_Phi4-PT-merged
22
- model_type: AutoModelForCausalLM
23
- tokenizer_type: AutoTokenizer
24
-
25
- #hub_model_id: NewEden/Phi4-pretrain
26
- #hub_strategy: "all_checkpoints"
27
- #push_dataset_to_hub:
28
- #hf_use_auth_token: true
29
-
30
- plugins:
31
- - axolotl.integrations.liger.LigerPlugin
32
- liger_rope: true
33
- liger_rms_norm: true
34
- liger_swiglu: true
35
- liger_fused_linear_cross_entropy: true
36
-
37
- #plugins:
38
- # - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
39
-
40
- #cut_cross_entropy: true
41
-
42
- load_in_8bit: false
43
- load_in_4bit: false
44
- strict: false
45
-
46
- datasets:
47
- - path: NewEden/Orion-LIT
48
- type: completion
49
- field: text
50
- shuffle_merged_datasets: true
51
- dataset_prepared_path: prepared_data
52
- val_set_size: 0.0
53
- output_dir: ./phi4-ptv2-out-r1
54
-
55
- sequence_len: 16384
56
- sample_packing: true
57
- pad_to_sequence_len: true
58
-
59
- adapter: lora
60
- lora_model_dir:
61
- lora_r: 128
62
- lora_alpha: 16
63
- lora_dropout: 0.05
64
- lora_target_modules:
65
- - gate_proj
66
- - down_proj
67
- - up_proj
68
- - q_proj
69
- - v_proj
70
- - k_proj
71
- - o_proj
72
-
73
- lora_modules_to_save:
74
- - embed_tokens
75
- - lm_head
76
-
77
-
78
- wandb_project: mag-phi
79
- wandb_entity:
80
- wandb_watch:
81
- wandb_name: comp-v2-attempt-01
82
- wandb_log_model:
83
-
84
- gradient_accumulation_steps: 4
85
- micro_batch_size: 2
86
- num_epochs: 1
87
- optimizer: paged_ademamix_8bit
88
- lr_scheduler: cosine
89
- learning_rate: 0.00002
90
-
91
- train_on_inputs: false
92
- group_by_length: false
93
- bf16: auto
94
- fp16:
95
- tf32: false
96
-
97
- gradient_checkpointing: unsloth
98
- early_stopping_patience:
99
- resume_from_checkpoint:
100
- local_rank:
101
- logging_steps: 1
102
- xformers_attention:
103
- flash_attention: true
104
-
105
- warmup_steps: 15
106
- evals_per_epoch: 4
107
- eval_table_size:
108
- eval_max_new_tokens: 128
109
- saves_per_epoch: 4
110
- debug:
111
- deepspeed: /workspace/axolotl/deepspeed_configs/zero3_bf16_cpuoffload_params.json
112
- weight_decay: 0.01
113
- fsdp:
114
- fsdp_config:
115
-
116
- ```
117
-
118
- </details><br>
119
-
120
- # phi4-ptv2-out-r1
121
-
122
- This model was trained from scratch on the NewEden/Orion-LIT dataset.
123
-
124
- ## Model description
125
-
126
- More information needed
127
-
128
- ## Intended uses & limitations
129
-
130
- More information needed
131
-
132
- ## Training and evaluation data
133
-
134
- More information needed
135
-
136
- ## Training procedure
137
-
138
- ### Training hyperparameters
139
-
140
- The following hyperparameters were used during training:
141
- - learning_rate: 2e-05
142
- - train_batch_size: 2
143
- - eval_batch_size: 2
144
- - seed: 42
145
- - distributed_type: multi-GPU
146
- - num_devices: 4
147
- - gradient_accumulation_steps: 4
148
- - total_train_batch_size: 32
149
- - total_eval_batch_size: 8
150
- - optimizer: Use OptimizerNames.PAGED_ADEMAMIX_8BIT and the args are:
151
- No additional optimizer arguments
152
- - lr_scheduler_type: cosine
153
- - lr_scheduler_warmup_steps: 15
154
- - num_epochs: 1.0
155
-
156
- ### Training results
157
-
158
-
159
-
160
- ### Framework versions
161
-
162
- - PEFT 0.14.0
163
- - Transformers 4.48.1
164
- - Pytorch 2.5.1+cu124
165
- - Datasets 3.2.0
166
- - Tokenizers 0.21.0