Bjarne12 commited on
Commit
b5133fb
·
verified ·
1 Parent(s): e00cc2f

Pushed the IDEFICS2 fine-tuned model.

Browse files
checkpoint-100/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "HuggingFaceM4/idefics2-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": false,
8
+ "init_lora_weights": "gaussian",
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 8,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": ".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$",
23
+ "task_type": null,
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
checkpoint-100/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d52a025c3e016a1a6f83c0d0f120ab1c44eb92397431ca9704595818797a92a6
3
+ size 93378688
checkpoint-100/generation_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 32000
6
+ ],
7
+ [
8
+ 32001
9
+ ]
10
+ ],
11
+ "bos_token_id": 1,
12
+ "eos_token_id": [
13
+ 2,
14
+ 32002
15
+ ],
16
+ "pad_token_id": 0,
17
+ "transformers_version": "4.45.0.dev0"
18
+ }
checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d4734890024d792660992ee97cbd6fb8bce021dc43f9cf20bb5a41e04ad144
3
+ size 47425352
checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0424eae083a7fb9c5394e7eae70fbcc60b93df70f05fa6d0ab94a68545eed5c
3
+ size 14244
checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
3
+ size 1064
checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.6,
5
+ "eval_steps": 10,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 9.971570014953613,
14
+ "learning_rate": 0.00019,
15
+ "loss": 0.3706,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 4.22176456451416,
21
+ "learning_rate": 0.00018,
22
+ "loss": 0.3124,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.16,
27
+ "eval_loss": 0.3505728542804718,
28
+ "eval_runtime": 64.0226,
29
+ "eval_samples_per_second": 3.124,
30
+ "eval_steps_per_second": 1.562,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.24,
35
+ "grad_norm": 3.4927077293395996,
36
+ "learning_rate": 0.00017,
37
+ "loss": 0.2459,
38
+ "step": 15
39
+ },
40
+ {
41
+ "epoch": 0.32,
42
+ "grad_norm": 5.083699703216553,
43
+ "learning_rate": 0.00016,
44
+ "loss": 0.2418,
45
+ "step": 20
46
+ },
47
+ {
48
+ "epoch": 0.32,
49
+ "eval_loss": 0.3516603112220764,
50
+ "eval_runtime": 63.9821,
51
+ "eval_samples_per_second": 3.126,
52
+ "eval_steps_per_second": 1.563,
53
+ "step": 20
54
+ },
55
+ {
56
+ "epoch": 0.4,
57
+ "grad_norm": 4.345269680023193,
58
+ "learning_rate": 0.00015000000000000001,
59
+ "loss": 0.2474,
60
+ "step": 25
61
+ },
62
+ {
63
+ "epoch": 0.48,
64
+ "grad_norm": 3.5892534255981445,
65
+ "learning_rate": 0.00014,
66
+ "loss": 0.2916,
67
+ "step": 30
68
+ },
69
+ {
70
+ "epoch": 0.48,
71
+ "eval_loss": 0.34584036469459534,
72
+ "eval_runtime": 64.416,
73
+ "eval_samples_per_second": 3.105,
74
+ "eval_steps_per_second": 1.552,
75
+ "step": 30
76
+ },
77
+ {
78
+ "epoch": 0.56,
79
+ "grad_norm": 3.6377668380737305,
80
+ "learning_rate": 0.00013000000000000002,
81
+ "loss": 0.2859,
82
+ "step": 35
83
+ },
84
+ {
85
+ "epoch": 0.64,
86
+ "grad_norm": 3.6360130310058594,
87
+ "learning_rate": 0.00012,
88
+ "loss": 0.2943,
89
+ "step": 40
90
+ },
91
+ {
92
+ "epoch": 0.64,
93
+ "eval_loss": 0.33658257126808167,
94
+ "eval_runtime": 64.2616,
95
+ "eval_samples_per_second": 3.112,
96
+ "eval_steps_per_second": 1.556,
97
+ "step": 40
98
+ },
99
+ {
100
+ "epoch": 0.72,
101
+ "grad_norm": 4.8437323570251465,
102
+ "learning_rate": 0.00011000000000000002,
103
+ "loss": 0.2664,
104
+ "step": 45
105
+ },
106
+ {
107
+ "epoch": 0.8,
108
+ "grad_norm": 2.5033814907073975,
109
+ "learning_rate": 0.0001,
110
+ "loss": 0.2793,
111
+ "step": 50
112
+ },
113
+ {
114
+ "epoch": 0.8,
115
+ "eval_loss": 0.34766554832458496,
116
+ "eval_runtime": 64.2152,
117
+ "eval_samples_per_second": 3.115,
118
+ "eval_steps_per_second": 1.557,
119
+ "step": 50
120
+ },
121
+ {
122
+ "epoch": 0.88,
123
+ "grad_norm": 3.4198801517486572,
124
+ "learning_rate": 9e-05,
125
+ "loss": 0.2661,
126
+ "step": 55
127
+ },
128
+ {
129
+ "epoch": 0.96,
130
+ "grad_norm": 3.3779234886169434,
131
+ "learning_rate": 8e-05,
132
+ "loss": 0.2596,
133
+ "step": 60
134
+ },
135
+ {
136
+ "epoch": 0.96,
137
+ "eval_loss": 0.32815971970558167,
138
+ "eval_runtime": 64.2353,
139
+ "eval_samples_per_second": 3.114,
140
+ "eval_steps_per_second": 1.557,
141
+ "step": 60
142
+ },
143
+ {
144
+ "epoch": 1.04,
145
+ "grad_norm": 1.620568037033081,
146
+ "learning_rate": 7e-05,
147
+ "loss": 0.2272,
148
+ "step": 65
149
+ },
150
+ {
151
+ "epoch": 1.12,
152
+ "grad_norm": 2.3302671909332275,
153
+ "learning_rate": 6e-05,
154
+ "loss": 0.1611,
155
+ "step": 70
156
+ },
157
+ {
158
+ "epoch": 1.12,
159
+ "eval_loss": 0.3327696919441223,
160
+ "eval_runtime": 64.397,
161
+ "eval_samples_per_second": 3.106,
162
+ "eval_steps_per_second": 1.553,
163
+ "step": 70
164
+ },
165
+ {
166
+ "epoch": 1.2,
167
+ "grad_norm": 2.511664867401123,
168
+ "learning_rate": 5e-05,
169
+ "loss": 0.1351,
170
+ "step": 75
171
+ },
172
+ {
173
+ "epoch": 1.28,
174
+ "grad_norm": 3.210343837738037,
175
+ "learning_rate": 4e-05,
176
+ "loss": 0.1436,
177
+ "step": 80
178
+ },
179
+ {
180
+ "epoch": 1.28,
181
+ "eval_loss": 0.3513491749763489,
182
+ "eval_runtime": 64.5251,
183
+ "eval_samples_per_second": 3.1,
184
+ "eval_steps_per_second": 1.55,
185
+ "step": 80
186
+ },
187
+ {
188
+ "epoch": 1.3599999999999999,
189
+ "grad_norm": 2.1352834701538086,
190
+ "learning_rate": 3e-05,
191
+ "loss": 0.1259,
192
+ "step": 85
193
+ },
194
+ {
195
+ "epoch": 1.44,
196
+ "grad_norm": 2.6668131351470947,
197
+ "learning_rate": 2e-05,
198
+ "loss": 0.1498,
199
+ "step": 90
200
+ },
201
+ {
202
+ "epoch": 1.44,
203
+ "eval_loss": 0.3499194383621216,
204
+ "eval_runtime": 63.9987,
205
+ "eval_samples_per_second": 3.125,
206
+ "eval_steps_per_second": 1.563,
207
+ "step": 90
208
+ },
209
+ {
210
+ "epoch": 1.52,
211
+ "grad_norm": 1.769700288772583,
212
+ "learning_rate": 1e-05,
213
+ "loss": 0.1438,
214
+ "step": 95
215
+ },
216
+ {
217
+ "epoch": 1.6,
218
+ "grad_norm": 3.0553314685821533,
219
+ "learning_rate": 0.0,
220
+ "loss": 0.1443,
221
+ "step": 100
222
+ },
223
+ {
224
+ "epoch": 1.6,
225
+ "eval_loss": 0.3474043011665344,
226
+ "eval_runtime": 64.2224,
227
+ "eval_samples_per_second": 3.114,
228
+ "eval_steps_per_second": 1.557,
229
+ "step": 100
230
+ }
231
+ ],
232
+ "logging_steps": 5,
233
+ "max_steps": 100,
234
+ "num_input_tokens_seen": 0,
235
+ "num_train_epochs": 2,
236
+ "save_steps": 25,
237
+ "stateful_callbacks": {
238
+ "TrainerControl": {
239
+ "args": {
240
+ "should_epoch_stop": false,
241
+ "should_evaluate": false,
242
+ "should_log": false,
243
+ "should_save": true,
244
+ "should_training_stop": true
245
+ },
246
+ "attributes": {}
247
+ }
248
+ },
249
+ "total_flos": 7451874544152192.0,
250
+ "train_batch_size": 2,
251
+ "trial_name": null,
252
+ "trial_params": null
253
+ }
checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1d4146a5ea439b2e4f958c5ddb4df798f34e280e44ea9240f130fa50216eca
3
+ size 5176
checkpoint-50/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "HuggingFaceM4/idefics2-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": false,
8
+ "init_lora_weights": "gaussian",
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 8,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": ".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$",
23
+ "task_type": null,
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
checkpoint-50/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8daec85916201f67a9dce0c1a33fa8dccb27b057c2ba1b02f232d0bbf270a483
3
+ size 93378688
checkpoint-50/generation_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 32000
6
+ ],
7
+ [
8
+ 32001
9
+ ]
10
+ ],
11
+ "bos_token_id": 1,
12
+ "eos_token_id": [
13
+ 2,
14
+ 32002
15
+ ],
16
+ "pad_token_id": 0,
17
+ "transformers_version": "4.45.0.dev0"
18
+ }
checkpoint-50/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1782824cfa0a5e19d72f42368bcb422a141414194b9e8360ad627550679f44a2
3
+ size 47425352
checkpoint-50/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19332abd72a44bc95260444b603faaf178d8e47dffbd1a67dc9f912654f4291b
3
+ size 14180
checkpoint-50/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e7059497cfcc0d673af5d5ae14c3cd29f5adce0d3ef9e12e77aa7b6fdf93a0
3
+ size 1064
checkpoint-50/trainer_state.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.8,
5
+ "eval_steps": 10,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 9.971570014953613,
14
+ "learning_rate": 0.00019,
15
+ "loss": 0.3706,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 4.22176456451416,
21
+ "learning_rate": 0.00018,
22
+ "loss": 0.3124,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.16,
27
+ "eval_loss": 0.3505728542804718,
28
+ "eval_runtime": 64.0226,
29
+ "eval_samples_per_second": 3.124,
30
+ "eval_steps_per_second": 1.562,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.24,
35
+ "grad_norm": 3.4927077293395996,
36
+ "learning_rate": 0.00017,
37
+ "loss": 0.2459,
38
+ "step": 15
39
+ },
40
+ {
41
+ "epoch": 0.32,
42
+ "grad_norm": 5.083699703216553,
43
+ "learning_rate": 0.00016,
44
+ "loss": 0.2418,
45
+ "step": 20
46
+ },
47
+ {
48
+ "epoch": 0.32,
49
+ "eval_loss": 0.3516603112220764,
50
+ "eval_runtime": 63.9821,
51
+ "eval_samples_per_second": 3.126,
52
+ "eval_steps_per_second": 1.563,
53
+ "step": 20
54
+ },
55
+ {
56
+ "epoch": 0.4,
57
+ "grad_norm": 4.345269680023193,
58
+ "learning_rate": 0.00015000000000000001,
59
+ "loss": 0.2474,
60
+ "step": 25
61
+ },
62
+ {
63
+ "epoch": 0.48,
64
+ "grad_norm": 3.5892534255981445,
65
+ "learning_rate": 0.00014,
66
+ "loss": 0.2916,
67
+ "step": 30
68
+ },
69
+ {
70
+ "epoch": 0.48,
71
+ "eval_loss": 0.34584036469459534,
72
+ "eval_runtime": 64.416,
73
+ "eval_samples_per_second": 3.105,
74
+ "eval_steps_per_second": 1.552,
75
+ "step": 30
76
+ },
77
+ {
78
+ "epoch": 0.56,
79
+ "grad_norm": 3.6377668380737305,
80
+ "learning_rate": 0.00013000000000000002,
81
+ "loss": 0.2859,
82
+ "step": 35
83
+ },
84
+ {
85
+ "epoch": 0.64,
86
+ "grad_norm": 3.6360130310058594,
87
+ "learning_rate": 0.00012,
88
+ "loss": 0.2943,
89
+ "step": 40
90
+ },
91
+ {
92
+ "epoch": 0.64,
93
+ "eval_loss": 0.33658257126808167,
94
+ "eval_runtime": 64.2616,
95
+ "eval_samples_per_second": 3.112,
96
+ "eval_steps_per_second": 1.556,
97
+ "step": 40
98
+ },
99
+ {
100
+ "epoch": 0.72,
101
+ "grad_norm": 4.8437323570251465,
102
+ "learning_rate": 0.00011000000000000002,
103
+ "loss": 0.2664,
104
+ "step": 45
105
+ },
106
+ {
107
+ "epoch": 0.8,
108
+ "grad_norm": 2.5033814907073975,
109
+ "learning_rate": 0.0001,
110
+ "loss": 0.2793,
111
+ "step": 50
112
+ },
113
+ {
114
+ "epoch": 0.8,
115
+ "eval_loss": 0.34766554832458496,
116
+ "eval_runtime": 64.2152,
117
+ "eval_samples_per_second": 3.115,
118
+ "eval_steps_per_second": 1.557,
119
+ "step": 50
120
+ }
121
+ ],
122
+ "logging_steps": 5,
123
+ "max_steps": 100,
124
+ "num_input_tokens_seen": 0,
125
+ "num_train_epochs": 2,
126
+ "save_steps": 25,
127
+ "stateful_callbacks": {
128
+ "TrainerControl": {
129
+ "args": {
130
+ "should_epoch_stop": false,
131
+ "should_evaluate": false,
132
+ "should_log": false,
133
+ "should_save": true,
134
+ "should_training_stop": false
135
+ },
136
+ "attributes": {}
137
+ }
138
+ },
139
+ "total_flos": 3720764707851840.0,
140
+ "train_batch_size": 2,
141
+ "trial_name": null,
142
+ "trial_params": null
143
+ }
checkpoint-50/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1d4146a5ea439b2e4f958c5ddb4df798f34e280e44ea9240f130fa50216eca
3
+ size 5176
checkpoint-75/.ipynb_checkpoints/adapter_config-checkpoint.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "HuggingFaceM4/idefics2-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": false,
8
+ "init_lora_weights": "gaussian",
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 8,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": ".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$",
23
+ "task_type": null,
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
checkpoint-75/.ipynb_checkpoints/generation_config-checkpoint.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 32000
6
+ ],
7
+ [
8
+ 32001
9
+ ]
10
+ ],
11
+ "bos_token_id": 1,
12
+ "eos_token_id": [
13
+ 2,
14
+ 32002
15
+ ],
16
+ "pad_token_id": 0,
17
+ "transformers_version": "4.45.0.dev0"
18
+ }
checkpoint-75/.ipynb_checkpoints/trainer_state-checkpoint.json ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.2,
5
+ "eval_steps": 10,
6
+ "global_step": 75,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 9.971570014953613,
14
+ "learning_rate": 0.00019,
15
+ "loss": 0.3706,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 4.22176456451416,
21
+ "learning_rate": 0.00018,
22
+ "loss": 0.3124,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.16,
27
+ "eval_loss": 0.3505728542804718,
28
+ "eval_runtime": 64.0226,
29
+ "eval_samples_per_second": 3.124,
30
+ "eval_steps_per_second": 1.562,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.24,
35
+ "grad_norm": 3.4927077293395996,
36
+ "learning_rate": 0.00017,
37
+ "loss": 0.2459,
38
+ "step": 15
39
+ },
40
+ {
41
+ "epoch": 0.32,
42
+ "grad_norm": 5.083699703216553,
43
+ "learning_rate": 0.00016,
44
+ "loss": 0.2418,
45
+ "step": 20
46
+ },
47
+ {
48
+ "epoch": 0.32,
49
+ "eval_loss": 0.3516603112220764,
50
+ "eval_runtime": 63.9821,
51
+ "eval_samples_per_second": 3.126,
52
+ "eval_steps_per_second": 1.563,
53
+ "step": 20
54
+ },
55
+ {
56
+ "epoch": 0.4,
57
+ "grad_norm": 4.345269680023193,
58
+ "learning_rate": 0.00015000000000000001,
59
+ "loss": 0.2474,
60
+ "step": 25
61
+ },
62
+ {
63
+ "epoch": 0.48,
64
+ "grad_norm": 3.5892534255981445,
65
+ "learning_rate": 0.00014,
66
+ "loss": 0.2916,
67
+ "step": 30
68
+ },
69
+ {
70
+ "epoch": 0.48,
71
+ "eval_loss": 0.34584036469459534,
72
+ "eval_runtime": 64.416,
73
+ "eval_samples_per_second": 3.105,
74
+ "eval_steps_per_second": 1.552,
75
+ "step": 30
76
+ },
77
+ {
78
+ "epoch": 0.56,
79
+ "grad_norm": 3.6377668380737305,
80
+ "learning_rate": 0.00013000000000000002,
81
+ "loss": 0.2859,
82
+ "step": 35
83
+ },
84
+ {
85
+ "epoch": 0.64,
86
+ "grad_norm": 3.6360130310058594,
87
+ "learning_rate": 0.00012,
88
+ "loss": 0.2943,
89
+ "step": 40
90
+ },
91
+ {
92
+ "epoch": 0.64,
93
+ "eval_loss": 0.33658257126808167,
94
+ "eval_runtime": 64.2616,
95
+ "eval_samples_per_second": 3.112,
96
+ "eval_steps_per_second": 1.556,
97
+ "step": 40
98
+ },
99
+ {
100
+ "epoch": 0.72,
101
+ "grad_norm": 4.8437323570251465,
102
+ "learning_rate": 0.00011000000000000002,
103
+ "loss": 0.2664,
104
+ "step": 45
105
+ },
106
+ {
107
+ "epoch": 0.8,
108
+ "grad_norm": 2.5033814907073975,
109
+ "learning_rate": 0.0001,
110
+ "loss": 0.2793,
111
+ "step": 50
112
+ },
113
+ {
114
+ "epoch": 0.8,
115
+ "eval_loss": 0.34766554832458496,
116
+ "eval_runtime": 64.2152,
117
+ "eval_samples_per_second": 3.115,
118
+ "eval_steps_per_second": 1.557,
119
+ "step": 50
120
+ },
121
+ {
122
+ "epoch": 0.88,
123
+ "grad_norm": 3.4198801517486572,
124
+ "learning_rate": 9e-05,
125
+ "loss": 0.2661,
126
+ "step": 55
127
+ },
128
+ {
129
+ "epoch": 0.96,
130
+ "grad_norm": 3.3779234886169434,
131
+ "learning_rate": 8e-05,
132
+ "loss": 0.2596,
133
+ "step": 60
134
+ },
135
+ {
136
+ "epoch": 0.96,
137
+ "eval_loss": 0.32815971970558167,
138
+ "eval_runtime": 64.2353,
139
+ "eval_samples_per_second": 3.114,
140
+ "eval_steps_per_second": 1.557,
141
+ "step": 60
142
+ },
143
+ {
144
+ "epoch": 1.04,
145
+ "grad_norm": 1.620568037033081,
146
+ "learning_rate": 7e-05,
147
+ "loss": 0.2272,
148
+ "step": 65
149
+ },
150
+ {
151
+ "epoch": 1.12,
152
+ "grad_norm": 2.3302671909332275,
153
+ "learning_rate": 6e-05,
154
+ "loss": 0.1611,
155
+ "step": 70
156
+ },
157
+ {
158
+ "epoch": 1.12,
159
+ "eval_loss": 0.3327696919441223,
160
+ "eval_runtime": 64.397,
161
+ "eval_samples_per_second": 3.106,
162
+ "eval_steps_per_second": 1.553,
163
+ "step": 70
164
+ },
165
+ {
166
+ "epoch": 1.2,
167
+ "grad_norm": 2.511664867401123,
168
+ "learning_rate": 5e-05,
169
+ "loss": 0.1351,
170
+ "step": 75
171
+ }
172
+ ],
173
+ "logging_steps": 5,
174
+ "max_steps": 100,
175
+ "num_input_tokens_seen": 0,
176
+ "num_train_epochs": 2,
177
+ "save_steps": 25,
178
+ "stateful_callbacks": {
179
+ "TrainerControl": {
180
+ "args": {
181
+ "should_epoch_stop": false,
182
+ "should_evaluate": false,
183
+ "should_log": false,
184
+ "should_save": true,
185
+ "should_training_stop": false
186
+ },
187
+ "attributes": {}
188
+ }
189
+ },
190
+ "total_flos": 5587264613696832.0,
191
+ "train_batch_size": 2,
192
+ "trial_name": null,
193
+ "trial_params": null
194
+ }
checkpoint-75/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "HuggingFaceM4/idefics2-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": false,
8
+ "init_lora_weights": "gaussian",
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 8,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": ".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$",
23
+ "task_type": null,
24
+ "use_dora": false,
25
+ "use_rslora": false
26
+ }
checkpoint-75/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38fe7f1c3332543a39a0e1da32d7985b84c3e0ac86e4638b61b11d088b5bd36
3
+ size 93378688
checkpoint-75/generation_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 32000
6
+ ],
7
+ [
8
+ 32001
9
+ ]
10
+ ],
11
+ "bos_token_id": 1,
12
+ "eos_token_id": [
13
+ 2,
14
+ 32002
15
+ ],
16
+ "pad_token_id": 0,
17
+ "transformers_version": "4.45.0.dev0"
18
+ }
checkpoint-75/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6860107712175eeacc615177130c68fcb6346ec09565071980e70b48990345d
3
+ size 47425352
checkpoint-75/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:028ef1ed118bb03768b6b48f4f298b5a8af0e6aae825d92a4c571f6e2cb13c61
3
+ size 14244
checkpoint-75/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9eaa8bb669020271b7c910d2bdf193c654b43d7f59fb9d10de33c026f49712
3
+ size 1064
checkpoint-75/trainer_state.json ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.2,
5
+ "eval_steps": 10,
6
+ "global_step": 75,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 9.971570014953613,
14
+ "learning_rate": 0.00019,
15
+ "loss": 0.3706,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 4.22176456451416,
21
+ "learning_rate": 0.00018,
22
+ "loss": 0.3124,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.16,
27
+ "eval_loss": 0.3505728542804718,
28
+ "eval_runtime": 64.0226,
29
+ "eval_samples_per_second": 3.124,
30
+ "eval_steps_per_second": 1.562,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.24,
35
+ "grad_norm": 3.4927077293395996,
36
+ "learning_rate": 0.00017,
37
+ "loss": 0.2459,
38
+ "step": 15
39
+ },
40
+ {
41
+ "epoch": 0.32,
42
+ "grad_norm": 5.083699703216553,
43
+ "learning_rate": 0.00016,
44
+ "loss": 0.2418,
45
+ "step": 20
46
+ },
47
+ {
48
+ "epoch": 0.32,
49
+ "eval_loss": 0.3516603112220764,
50
+ "eval_runtime": 63.9821,
51
+ "eval_samples_per_second": 3.126,
52
+ "eval_steps_per_second": 1.563,
53
+ "step": 20
54
+ },
55
+ {
56
+ "epoch": 0.4,
57
+ "grad_norm": 4.345269680023193,
58
+ "learning_rate": 0.00015000000000000001,
59
+ "loss": 0.2474,
60
+ "step": 25
61
+ },
62
+ {
63
+ "epoch": 0.48,
64
+ "grad_norm": 3.5892534255981445,
65
+ "learning_rate": 0.00014,
66
+ "loss": 0.2916,
67
+ "step": 30
68
+ },
69
+ {
70
+ "epoch": 0.48,
71
+ "eval_loss": 0.34584036469459534,
72
+ "eval_runtime": 64.416,
73
+ "eval_samples_per_second": 3.105,
74
+ "eval_steps_per_second": 1.552,
75
+ "step": 30
76
+ },
77
+ {
78
+ "epoch": 0.56,
79
+ "grad_norm": 3.6377668380737305,
80
+ "learning_rate": 0.00013000000000000002,
81
+ "loss": 0.2859,
82
+ "step": 35
83
+ },
84
+ {
85
+ "epoch": 0.64,
86
+ "grad_norm": 3.6360130310058594,
87
+ "learning_rate": 0.00012,
88
+ "loss": 0.2943,
89
+ "step": 40
90
+ },
91
+ {
92
+ "epoch": 0.64,
93
+ "eval_loss": 0.33658257126808167,
94
+ "eval_runtime": 64.2616,
95
+ "eval_samples_per_second": 3.112,
96
+ "eval_steps_per_second": 1.556,
97
+ "step": 40
98
+ },
99
+ {
100
+ "epoch": 0.72,
101
+ "grad_norm": 4.8437323570251465,
102
+ "learning_rate": 0.00011000000000000002,
103
+ "loss": 0.2664,
104
+ "step": 45
105
+ },
106
+ {
107
+ "epoch": 0.8,
108
+ "grad_norm": 2.5033814907073975,
109
+ "learning_rate": 0.0001,
110
+ "loss": 0.2793,
111
+ "step": 50
112
+ },
113
+ {
114
+ "epoch": 0.8,
115
+ "eval_loss": 0.34766554832458496,
116
+ "eval_runtime": 64.2152,
117
+ "eval_samples_per_second": 3.115,
118
+ "eval_steps_per_second": 1.557,
119
+ "step": 50
120
+ },
121
+ {
122
+ "epoch": 0.88,
123
+ "grad_norm": 3.4198801517486572,
124
+ "learning_rate": 9e-05,
125
+ "loss": 0.2661,
126
+ "step": 55
127
+ },
128
+ {
129
+ "epoch": 0.96,
130
+ "grad_norm": 3.3779234886169434,
131
+ "learning_rate": 8e-05,
132
+ "loss": 0.2596,
133
+ "step": 60
134
+ },
135
+ {
136
+ "epoch": 0.96,
137
+ "eval_loss": 0.32815971970558167,
138
+ "eval_runtime": 64.2353,
139
+ "eval_samples_per_second": 3.114,
140
+ "eval_steps_per_second": 1.557,
141
+ "step": 60
142
+ },
143
+ {
144
+ "epoch": 1.04,
145
+ "grad_norm": 1.620568037033081,
146
+ "learning_rate": 7e-05,
147
+ "loss": 0.2272,
148
+ "step": 65
149
+ },
150
+ {
151
+ "epoch": 1.12,
152
+ "grad_norm": 2.3302671909332275,
153
+ "learning_rate": 6e-05,
154
+ "loss": 0.1611,
155
+ "step": 70
156
+ },
157
+ {
158
+ "epoch": 1.12,
159
+ "eval_loss": 0.3327696919441223,
160
+ "eval_runtime": 64.397,
161
+ "eval_samples_per_second": 3.106,
162
+ "eval_steps_per_second": 1.553,
163
+ "step": 70
164
+ },
165
+ {
166
+ "epoch": 1.2,
167
+ "grad_norm": 2.511664867401123,
168
+ "learning_rate": 5e-05,
169
+ "loss": 0.1351,
170
+ "step": 75
171
+ }
172
+ ],
173
+ "logging_steps": 5,
174
+ "max_steps": 100,
175
+ "num_input_tokens_seen": 0,
176
+ "num_train_epochs": 2,
177
+ "save_steps": 25,
178
+ "stateful_callbacks": {
179
+ "TrainerControl": {
180
+ "args": {
181
+ "should_epoch_stop": false,
182
+ "should_evaluate": false,
183
+ "should_log": false,
184
+ "should_save": true,
185
+ "should_training_stop": false
186
+ },
187
+ "attributes": {}
188
+ }
189
+ },
190
+ "total_flos": 5587264613696832.0,
191
+ "train_batch_size": 2,
192
+ "trial_name": null,
193
+ "trial_params": null
194
+ }
checkpoint-75/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1d4146a5ea439b2e4f958c5ddb4df798f34e280e44ea9240f130fa50216eca
3
+ size 5176