kisa-misa commited on
Commit
d05da49
·
verified ·
1 Parent(s): 8f3b3fb

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8557692307692307
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.4043
36
- - Accuracy: 0.8558
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8653846153846154
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.3938
36
+ - Accuracy: 0.8654
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 17.77777777777778,
3
- "eval_accuracy": 0.8404255319148937,
4
- "eval_loss": 0.4538363516330719,
5
- "eval_runtime": 0.7115,
6
- "eval_samples_per_second": 132.115,
7
- "eval_steps_per_second": 4.216,
8
- "total_flos": 3.7037909187824026e+17,
9
- "train_loss": 0.427582456668218,
10
- "train_runtime": 256.3024,
11
- "train_samples_per_second": 65.313,
12
- "train_steps_per_second": 0.468
13
  }
 
1
  {
2
+ "epoch": 28.0,
3
+ "eval_accuracy": 0.8653846153846154,
4
+ "eval_loss": 0.39384937286376953,
5
+ "eval_runtime": 0.7571,
6
+ "eval_samples_per_second": 137.371,
7
+ "eval_steps_per_second": 5.283,
8
+ "total_flos": 6.51425759341314e+17,
9
+ "train_loss": 0.4042152370725359,
10
+ "train_runtime": 453.861,
11
+ "train_samples_per_second": 61.869,
12
+ "train_steps_per_second": 0.463
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 17.77777777777778,
3
- "eval_accuracy": 0.8404255319148937,
4
- "eval_loss": 0.4538363516330719,
5
- "eval_runtime": 0.7115,
6
- "eval_samples_per_second": 132.115,
7
- "eval_steps_per_second": 4.216
8
  }
 
1
  {
2
+ "epoch": 28.0,
3
+ "eval_accuracy": 0.8653846153846154,
4
+ "eval_loss": 0.39384937286376953,
5
+ "eval_runtime": 0.7571,
6
+ "eval_samples_per_second": 137.371,
7
+ "eval_steps_per_second": 5.283
8
  }
runs/Jun10_17-27-32_8bc7bf2b7b8a/events.out.tfevents.1718041189.8bc7bf2b7b8a.515.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:545a3de2161e6148771024518f458214ec1ef30ed3f1ab27dd7e273d718adf29
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 17.77777777777778,
3
- "total_flos": 3.7037909187824026e+17,
4
- "train_loss": 0.427582456668218,
5
- "train_runtime": 256.3024,
6
- "train_samples_per_second": 65.313,
7
- "train_steps_per_second": 0.468
8
  }
 
1
  {
2
+ "epoch": 28.0,
3
+ "total_flos": 6.51425759341314e+17,
4
+ "train_loss": 0.4042152370725359,
5
+ "train_runtime": 453.861,
6
+ "train_samples_per_second": 61.869,
7
+ "train_steps_per_second": 0.463
8
  }
trainer_state.json CHANGED
@@ -1,275 +1,440 @@
1
  {
2
- "best_metric": 0.8404255319148937,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-phones/checkpoint-81",
4
- "epoch": 17.77777777777778,
5
  "eval_steps": 500,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.8888888888888888,
13
- "eval_accuracy": 0.6595744680851063,
14
- "eval_loss": 0.6158677935600281,
15
- "eval_runtime": 0.616,
16
- "eval_samples_per_second": 152.588,
17
- "eval_steps_per_second": 4.87,
18
- "step": 6
19
  },
20
  {
21
- "epoch": 1.4814814814814814,
22
- "grad_norm": 8.271566390991211,
23
- "learning_rate": 4.166666666666667e-05,
24
- "loss": 0.5542,
25
  "step": 10
26
  },
27
  {
28
- "epoch": 1.925925925925926,
29
- "eval_accuracy": 0.7127659574468085,
30
- "eval_loss": 0.5690343379974365,
31
- "eval_runtime": 0.5828,
32
- "eval_samples_per_second": 161.284,
33
- "eval_steps_per_second": 5.147,
34
- "step": 13
35
  },
36
  {
37
- "epoch": 2.962962962962963,
38
- "grad_norm": 9.217803001403809,
39
- "learning_rate": 4.62962962962963e-05,
40
- "loss": 0.5334,
41
  "step": 20
42
  },
43
  {
44
- "epoch": 2.962962962962963,
45
- "eval_accuracy": 0.7127659574468085,
46
- "eval_loss": 0.5524399280548096,
47
- "eval_runtime": 0.6208,
48
- "eval_samples_per_second": 151.413,
49
- "eval_steps_per_second": 4.832,
50
- "step": 20
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.7021276595744681,
55
- "eval_loss": 0.5923256874084473,
56
- "eval_runtime": 0.6139,
57
- "eval_samples_per_second": 153.118,
58
- "eval_steps_per_second": 4.887,
59
- "step": 27
60
- },
61
- {
62
- "epoch": 4.444444444444445,
63
- "grad_norm": 8.293265342712402,
64
- "learning_rate": 4.166666666666667e-05,
65
- "loss": 0.4658,
66
  "step": 30
67
  },
68
  {
69
- "epoch": 4.888888888888889,
70
- "eval_accuracy": 0.7659574468085106,
71
- "eval_loss": 0.4978613257408142,
72
- "eval_runtime": 0.7172,
73
- "eval_samples_per_second": 131.065,
74
- "eval_steps_per_second": 4.183,
75
- "step": 33
76
  },
77
  {
78
- "epoch": 5.925925925925926,
79
- "grad_norm": 7.5387701988220215,
80
- "learning_rate": 3.7037037037037037e-05,
81
- "loss": 0.469,
82
- "step": 40
 
 
83
  },
84
  {
85
- "epoch": 5.925925925925926,
86
- "eval_accuracy": 0.776595744680851,
87
- "eval_loss": 0.4834465980529785,
88
- "eval_runtime": 0.6072,
89
- "eval_samples_per_second": 154.816,
90
- "eval_steps_per_second": 4.941,
91
  "step": 40
92
  },
93
  {
94
- "epoch": 6.962962962962963,
95
- "eval_accuracy": 0.7446808510638298,
96
- "eval_loss": 0.5267057418823242,
97
- "eval_runtime": 0.604,
98
- "eval_samples_per_second": 155.621,
99
- "eval_steps_per_second": 4.967,
100
- "step": 47
101
  },
102
  {
103
- "epoch": 7.407407407407407,
104
- "grad_norm": 7.728250026702881,
105
- "learning_rate": 3.240740740740741e-05,
106
- "loss": 0.4469,
107
  "step": 50
108
  },
 
 
 
 
 
 
 
 
 
109
  {
110
  "epoch": 8.0,
111
- "eval_accuracy": 0.723404255319149,
112
- "eval_loss": 0.5309192538261414,
113
- "eval_runtime": 0.6111,
114
- "eval_samples_per_second": 153.821,
115
- "eval_steps_per_second": 4.909,
116
- "step": 54
117
- },
118
- {
119
- "epoch": 8.88888888888889,
120
- "grad_norm": 12.034634590148926,
121
- "learning_rate": 2.777777777777778e-05,
122
- "loss": 0.436,
123
  "step": 60
124
  },
125
  {
126
- "epoch": 8.88888888888889,
127
- "eval_accuracy": 0.7872340425531915,
128
- "eval_loss": 0.4591914117336273,
129
- "eval_runtime": 0.6041,
130
- "eval_samples_per_second": 155.611,
131
- "eval_steps_per_second": 4.966,
132
  "step": 60
133
  },
134
  {
135
- "epoch": 9.925925925925926,
136
- "eval_accuracy": 0.8191489361702128,
137
- "eval_loss": 0.4601520001888275,
138
- "eval_runtime": 0.6315,
139
- "eval_samples_per_second": 148.848,
140
- "eval_steps_per_second": 4.75,
141
  "step": 67
142
  },
143
  {
144
- "epoch": 10.37037037037037,
145
- "grad_norm": 8.522727012634277,
146
- "learning_rate": 2.314814814814815e-05,
147
- "loss": 0.3978,
148
  "step": 70
149
  },
150
  {
151
- "epoch": 10.962962962962964,
152
- "eval_accuracy": 0.8085106382978723,
153
- "eval_loss": 0.479958176612854,
154
- "eval_runtime": 0.7165,
155
- "eval_samples_per_second": 131.191,
156
- "eval_steps_per_second": 4.187,
157
- "step": 74
158
  },
159
  {
160
- "epoch": 11.851851851851851,
161
- "grad_norm": 8.708232879638672,
162
- "learning_rate": 1.8518518518518518e-05,
163
- "loss": 0.3916,
164
  "step": 80
165
  },
166
  {
167
- "epoch": 12.0,
168
- "eval_accuracy": 0.8404255319148937,
169
- "eval_loss": 0.4538363516330719,
170
- "eval_runtime": 0.7058,
171
- "eval_samples_per_second": 133.179,
172
- "eval_steps_per_second": 4.25,
173
- "step": 81
174
  },
175
  {
176
- "epoch": 12.88888888888889,
177
- "eval_accuracy": 0.7872340425531915,
178
- "eval_loss": 0.4807003438472748,
179
- "eval_runtime": 0.7274,
180
- "eval_samples_per_second": 129.222,
181
- "eval_steps_per_second": 4.124,
182
- "step": 87
183
  },
184
  {
185
- "epoch": 13.333333333333334,
186
- "grad_norm": 10.32046127319336,
187
- "learning_rate": 1.388888888888889e-05,
188
- "loss": 0.3932,
 
 
189
  "step": 90
190
  },
191
  {
192
- "epoch": 13.925925925925926,
193
- "eval_accuracy": 0.8297872340425532,
194
- "eval_loss": 0.4472525715827942,
195
- "eval_runtime": 0.6791,
196
- "eval_samples_per_second": 138.408,
197
- "eval_steps_per_second": 4.417,
198
- "step": 94
199
  },
200
  {
201
- "epoch": 14.814814814814815,
202
- "grad_norm": 7.876950263977051,
203
- "learning_rate": 9.259259259259259e-06,
204
- "loss": 0.3362,
205
  "step": 100
206
  },
207
  {
208
- "epoch": 14.962962962962964,
209
- "eval_accuracy": 0.8085106382978723,
210
- "eval_loss": 0.4526437520980835,
211
- "eval_runtime": 0.6231,
212
- "eval_samples_per_second": 150.861,
213
- "eval_steps_per_second": 4.815,
214
- "step": 101
215
  },
216
  {
217
- "epoch": 16.0,
218
- "eval_accuracy": 0.8191489361702128,
219
- "eval_loss": 0.4435100257396698,
220
- "eval_runtime": 0.6246,
221
- "eval_samples_per_second": 150.502,
222
- "eval_steps_per_second": 4.803,
223
- "step": 108
224
- },
225
- {
226
- "epoch": 16.296296296296298,
227
- "grad_norm": 7.970451354980469,
228
- "learning_rate": 4.6296296296296296e-06,
229
- "loss": 0.3631,
230
  "step": 110
231
  },
232
  {
233
- "epoch": 16.88888888888889,
234
- "eval_accuracy": 0.8297872340425532,
235
- "eval_loss": 0.4465709328651428,
236
- "eval_runtime": 0.6157,
237
- "eval_samples_per_second": 152.667,
238
- "eval_steps_per_second": 4.872,
239
- "step": 114
240
  },
241
  {
242
- "epoch": 17.77777777777778,
243
- "grad_norm": 7.780444622039795,
244
- "learning_rate": 0.0,
245
- "loss": 0.3441,
246
  "step": 120
247
  },
248
  {
249
- "epoch": 17.77777777777778,
250
- "eval_accuracy": 0.8297872340425532,
251
- "eval_loss": 0.45035284757614136,
252
- "eval_runtime": 0.6091,
253
- "eval_samples_per_second": 154.338,
254
- "eval_steps_per_second": 4.926,
255
  "step": 120
256
  },
257
  {
258
- "epoch": 17.77777777777778,
259
- "step": 120,
260
- "total_flos": 3.7037909187824026e+17,
261
- "train_loss": 0.427582456668218,
262
- "train_runtime": 256.3024,
263
- "train_samples_per_second": 65.313,
264
- "train_steps_per_second": 0.468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  }
266
  ],
267
  "logging_steps": 10,
268
- "max_steps": 120,
269
  "num_input_tokens_seen": 0,
270
- "num_train_epochs": 20,
271
  "save_steps": 500,
272
- "total_flos": 3.7037909187824026e+17,
 
 
 
 
 
 
 
 
 
 
 
 
273
  "train_batch_size": 32,
274
  "trial_name": null,
275
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8653846153846154,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-phones/checkpoint-135",
4
+ "epoch": 28.0,
5
  "eval_steps": 500,
6
+ "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.9333333333333333,
13
+ "eval_accuracy": 0.5673076923076923,
14
+ "eval_loss": 0.6742563247680664,
15
+ "eval_runtime": 1.0496,
16
+ "eval_samples_per_second": 99.088,
17
+ "eval_steps_per_second": 3.811,
18
+ "step": 7
19
  },
20
  {
21
+ "epoch": 1.3333333333333333,
22
+ "grad_norm": 5.86643123626709,
23
+ "learning_rate": 2.380952380952381e-05,
24
+ "loss": 0.6763,
25
  "step": 10
26
  },
27
  {
28
+ "epoch": 2.0,
29
+ "eval_accuracy": 0.6923076923076923,
30
+ "eval_loss": 0.6165803670883179,
31
+ "eval_runtime": 0.6494,
32
+ "eval_samples_per_second": 160.146,
33
+ "eval_steps_per_second": 6.159,
34
+ "step": 15
35
  },
36
  {
37
+ "epoch": 2.6666666666666665,
38
+ "grad_norm": 6.109733581542969,
39
+ "learning_rate": 4.761904761904762e-05,
40
+ "loss": 0.635,
41
  "step": 20
42
  },
43
  {
44
+ "epoch": 2.9333333333333336,
45
+ "eval_accuracy": 0.7403846153846154,
46
+ "eval_loss": 0.5646082758903503,
47
+ "eval_runtime": 0.7406,
48
+ "eval_samples_per_second": 140.428,
49
+ "eval_steps_per_second": 5.401,
50
+ "step": 22
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "grad_norm": 14.055779457092285,
55
+ "learning_rate": 4.761904761904762e-05,
56
+ "loss": 0.5724,
 
 
 
 
 
 
 
 
 
57
  "step": 30
58
  },
59
  {
60
+ "epoch": 4.0,
61
+ "eval_accuracy": 0.7307692307692307,
62
+ "eval_loss": 0.5073935985565186,
63
+ "eval_runtime": 0.6701,
64
+ "eval_samples_per_second": 155.19,
65
+ "eval_steps_per_second": 5.969,
66
+ "step": 30
67
  },
68
  {
69
+ "epoch": 4.933333333333334,
70
+ "eval_accuracy": 0.7692307692307693,
71
+ "eval_loss": 0.48087915778160095,
72
+ "eval_runtime": 0.6729,
73
+ "eval_samples_per_second": 154.555,
74
+ "eval_steps_per_second": 5.944,
75
+ "step": 37
76
  },
77
  {
78
+ "epoch": 5.333333333333333,
79
+ "grad_norm": 8.836523056030273,
80
+ "learning_rate": 4.4973544973544974e-05,
81
+ "loss": 0.527,
 
 
82
  "step": 40
83
  },
84
  {
85
+ "epoch": 6.0,
86
+ "eval_accuracy": 0.7692307692307693,
87
+ "eval_loss": 0.45965665578842163,
88
+ "eval_runtime": 0.6926,
89
+ "eval_samples_per_second": 150.153,
90
+ "eval_steps_per_second": 5.775,
91
+ "step": 45
92
  },
93
  {
94
+ "epoch": 6.666666666666667,
95
+ "grad_norm": 6.653749465942383,
96
+ "learning_rate": 4.232804232804233e-05,
97
+ "loss": 0.5304,
98
  "step": 50
99
  },
100
+ {
101
+ "epoch": 6.933333333333334,
102
+ "eval_accuracy": 0.7596153846153846,
103
+ "eval_loss": 0.47583022713661194,
104
+ "eval_runtime": 0.681,
105
+ "eval_samples_per_second": 152.716,
106
+ "eval_steps_per_second": 5.874,
107
+ "step": 52
108
+ },
109
  {
110
  "epoch": 8.0,
111
+ "grad_norm": 13.230646133422852,
112
+ "learning_rate": 3.968253968253968e-05,
113
+ "loss": 0.4597,
 
 
 
 
 
 
 
 
 
114
  "step": 60
115
  },
116
  {
117
+ "epoch": 8.0,
118
+ "eval_accuracy": 0.7884615384615384,
119
+ "eval_loss": 0.43429186940193176,
120
+ "eval_runtime": 0.7692,
121
+ "eval_samples_per_second": 135.213,
122
+ "eval_steps_per_second": 5.2,
123
  "step": 60
124
  },
125
  {
126
+ "epoch": 8.933333333333334,
127
+ "eval_accuracy": 0.7980769230769231,
128
+ "eval_loss": 0.42488300800323486,
129
+ "eval_runtime": 0.6816,
130
+ "eval_samples_per_second": 152.578,
131
+ "eval_steps_per_second": 5.868,
132
  "step": 67
133
  },
134
  {
135
+ "epoch": 9.333333333333334,
136
+ "grad_norm": 8.930418968200684,
137
+ "learning_rate": 3.7037037037037037e-05,
138
+ "loss": 0.4606,
139
  "step": 70
140
  },
141
  {
142
+ "epoch": 10.0,
143
+ "eval_accuracy": 0.7980769230769231,
144
+ "eval_loss": 0.42358094453811646,
145
+ "eval_runtime": 0.6708,
146
+ "eval_samples_per_second": 155.044,
147
+ "eval_steps_per_second": 5.963,
148
+ "step": 75
149
  },
150
  {
151
+ "epoch": 10.666666666666666,
152
+ "grad_norm": 10.609027862548828,
153
+ "learning_rate": 3.439153439153439e-05,
154
+ "loss": 0.4286,
155
  "step": 80
156
  },
157
  {
158
+ "epoch": 10.933333333333334,
159
+ "eval_accuracy": 0.8461538461538461,
160
+ "eval_loss": 0.4054819643497467,
161
+ "eval_runtime": 0.6768,
162
+ "eval_samples_per_second": 153.664,
163
+ "eval_steps_per_second": 5.91,
164
+ "step": 82
165
  },
166
  {
167
+ "epoch": 12.0,
168
+ "grad_norm": 5.532287120819092,
169
+ "learning_rate": 3.1746031746031745e-05,
170
+ "loss": 0.3857,
171
+ "step": 90
 
 
172
  },
173
  {
174
+ "epoch": 12.0,
175
+ "eval_accuracy": 0.8269230769230769,
176
+ "eval_loss": 0.4144248962402344,
177
+ "eval_runtime": 0.7584,
178
+ "eval_samples_per_second": 137.132,
179
+ "eval_steps_per_second": 5.274,
180
  "step": 90
181
  },
182
  {
183
+ "epoch": 12.933333333333334,
184
+ "eval_accuracy": 0.7980769230769231,
185
+ "eval_loss": 0.4293949007987976,
186
+ "eval_runtime": 0.6743,
187
+ "eval_samples_per_second": 154.239,
188
+ "eval_steps_per_second": 5.932,
189
+ "step": 97
190
  },
191
  {
192
+ "epoch": 13.333333333333334,
193
+ "grad_norm": 11.760865211486816,
194
+ "learning_rate": 2.91005291005291e-05,
195
+ "loss": 0.3801,
196
  "step": 100
197
  },
198
  {
199
+ "epoch": 14.0,
200
+ "eval_accuracy": 0.8461538461538461,
201
+ "eval_loss": 0.40805691480636597,
202
+ "eval_runtime": 0.6845,
203
+ "eval_samples_per_second": 151.929,
204
+ "eval_steps_per_second": 5.843,
205
+ "step": 105
206
  },
207
  {
208
+ "epoch": 14.666666666666666,
209
+ "grad_norm": 11.491521835327148,
210
+ "learning_rate": 2.6455026455026456e-05,
211
+ "loss": 0.3538,
 
 
 
 
 
 
 
 
 
212
  "step": 110
213
  },
214
  {
215
+ "epoch": 14.933333333333334,
216
+ "eval_accuracy": 0.8461538461538461,
217
+ "eval_loss": 0.4194793403148651,
218
+ "eval_runtime": 0.674,
219
+ "eval_samples_per_second": 154.298,
220
+ "eval_steps_per_second": 5.935,
221
+ "step": 112
222
  },
223
  {
224
+ "epoch": 16.0,
225
+ "grad_norm": 11.567109107971191,
226
+ "learning_rate": 2.380952380952381e-05,
227
+ "loss": 0.3585,
228
  "step": 120
229
  },
230
  {
231
+ "epoch": 16.0,
232
+ "eval_accuracy": 0.8557692307692307,
233
+ "eval_loss": 0.4068710207939148,
234
+ "eval_runtime": 0.7839,
235
+ "eval_samples_per_second": 132.667,
236
+ "eval_steps_per_second": 5.103,
237
  "step": 120
238
  },
239
  {
240
+ "epoch": 16.933333333333334,
241
+ "eval_accuracy": 0.8557692307692307,
242
+ "eval_loss": 0.3970623016357422,
243
+ "eval_runtime": 0.6669,
244
+ "eval_samples_per_second": 155.948,
245
+ "eval_steps_per_second": 5.998,
246
+ "step": 127
247
+ },
248
+ {
249
+ "epoch": 17.333333333333332,
250
+ "grad_norm": 12.216808319091797,
251
+ "learning_rate": 2.1164021164021164e-05,
252
+ "loss": 0.3258,
253
+ "step": 130
254
+ },
255
+ {
256
+ "epoch": 18.0,
257
+ "eval_accuracy": 0.8653846153846154,
258
+ "eval_loss": 0.39384937286376953,
259
+ "eval_runtime": 0.6899,
260
+ "eval_samples_per_second": 150.747,
261
+ "eval_steps_per_second": 5.798,
262
+ "step": 135
263
+ },
264
+ {
265
+ "epoch": 18.666666666666668,
266
+ "grad_norm": 15.223637580871582,
267
+ "learning_rate": 1.8518518518518518e-05,
268
+ "loss": 0.3288,
269
+ "step": 140
270
+ },
271
+ {
272
+ "epoch": 18.933333333333334,
273
+ "eval_accuracy": 0.8461538461538461,
274
+ "eval_loss": 0.396359384059906,
275
+ "eval_runtime": 0.6861,
276
+ "eval_samples_per_second": 151.574,
277
+ "eval_steps_per_second": 5.83,
278
+ "step": 142
279
+ },
280
+ {
281
+ "epoch": 20.0,
282
+ "grad_norm": 25.446683883666992,
283
+ "learning_rate": 1.5873015873015872e-05,
284
+ "loss": 0.3276,
285
+ "step": 150
286
+ },
287
+ {
288
+ "epoch": 20.0,
289
+ "eval_accuracy": 0.8557692307692307,
290
+ "eval_loss": 0.44233372807502747,
291
+ "eval_runtime": 0.6969,
292
+ "eval_samples_per_second": 149.226,
293
+ "eval_steps_per_second": 5.739,
294
+ "step": 150
295
+ },
296
+ {
297
+ "epoch": 20.933333333333334,
298
+ "eval_accuracy": 0.8365384615384616,
299
+ "eval_loss": 0.40670448541641235,
300
+ "eval_runtime": 0.7625,
301
+ "eval_samples_per_second": 136.392,
302
+ "eval_steps_per_second": 5.246,
303
+ "step": 157
304
+ },
305
+ {
306
+ "epoch": 21.333333333333332,
307
+ "grad_norm": 10.679701805114746,
308
+ "learning_rate": 1.3227513227513228e-05,
309
+ "loss": 0.317,
310
+ "step": 160
311
+ },
312
+ {
313
+ "epoch": 22.0,
314
+ "eval_accuracy": 0.8653846153846154,
315
+ "eval_loss": 0.4178958535194397,
316
+ "eval_runtime": 0.6933,
317
+ "eval_samples_per_second": 150.017,
318
+ "eval_steps_per_second": 5.77,
319
+ "step": 165
320
+ },
321
+ {
322
+ "epoch": 22.666666666666668,
323
+ "grad_norm": 12.451942443847656,
324
+ "learning_rate": 1.0582010582010582e-05,
325
+ "loss": 0.288,
326
+ "step": 170
327
+ },
328
+ {
329
+ "epoch": 22.933333333333334,
330
+ "eval_accuracy": 0.8557692307692307,
331
+ "eval_loss": 0.3881677985191345,
332
+ "eval_runtime": 0.6592,
333
+ "eval_samples_per_second": 157.766,
334
+ "eval_steps_per_second": 6.068,
335
+ "step": 172
336
+ },
337
+ {
338
+ "epoch": 24.0,
339
+ "grad_norm": 11.666213989257812,
340
+ "learning_rate": 7.936507936507936e-06,
341
+ "loss": 0.2735,
342
+ "step": 180
343
+ },
344
+ {
345
+ "epoch": 24.0,
346
+ "eval_accuracy": 0.8557692307692307,
347
+ "eval_loss": 0.42146697640419006,
348
+ "eval_runtime": 0.6742,
349
+ "eval_samples_per_second": 154.246,
350
+ "eval_steps_per_second": 5.933,
351
+ "step": 180
352
+ },
353
+ {
354
+ "epoch": 24.933333333333334,
355
+ "eval_accuracy": 0.8461538461538461,
356
+ "eval_loss": 0.3971670866012573,
357
+ "eval_runtime": 0.7934,
358
+ "eval_samples_per_second": 131.085,
359
+ "eval_steps_per_second": 5.042,
360
+ "step": 187
361
+ },
362
+ {
363
+ "epoch": 25.333333333333332,
364
+ "grad_norm": 7.033930778503418,
365
+ "learning_rate": 5.291005291005291e-06,
366
+ "loss": 0.2805,
367
+ "step": 190
368
+ },
369
+ {
370
+ "epoch": 26.0,
371
+ "eval_accuracy": 0.8557692307692307,
372
+ "eval_loss": 0.3943334221839905,
373
+ "eval_runtime": 0.6986,
374
+ "eval_samples_per_second": 148.874,
375
+ "eval_steps_per_second": 5.726,
376
+ "step": 195
377
+ },
378
+ {
379
+ "epoch": 26.666666666666668,
380
+ "grad_norm": 5.698335647583008,
381
+ "learning_rate": 2.6455026455026455e-06,
382
+ "loss": 0.2961,
383
+ "step": 200
384
+ },
385
+ {
386
+ "epoch": 26.933333333333334,
387
+ "eval_accuracy": 0.8557692307692307,
388
+ "eval_loss": 0.39985355734825134,
389
+ "eval_runtime": 0.691,
390
+ "eval_samples_per_second": 150.514,
391
+ "eval_steps_per_second": 5.789,
392
+ "step": 202
393
+ },
394
+ {
395
+ "epoch": 28.0,
396
+ "grad_norm": 9.678221702575684,
397
+ "learning_rate": 0.0,
398
+ "loss": 0.2832,
399
+ "step": 210
400
+ },
401
+ {
402
+ "epoch": 28.0,
403
+ "eval_accuracy": 0.8557692307692307,
404
+ "eval_loss": 0.4042527377605438,
405
+ "eval_runtime": 0.6919,
406
+ "eval_samples_per_second": 150.307,
407
+ "eval_steps_per_second": 5.781,
408
+ "step": 210
409
+ },
410
+ {
411
+ "epoch": 28.0,
412
+ "step": 210,
413
+ "total_flos": 6.51425759341314e+17,
414
+ "train_loss": 0.4042152370725359,
415
+ "train_runtime": 453.861,
416
+ "train_samples_per_second": 61.869,
417
+ "train_steps_per_second": 0.463
418
  }
419
  ],
420
  "logging_steps": 10,
421
+ "max_steps": 210,
422
  "num_input_tokens_seen": 0,
423
+ "num_train_epochs": 30,
424
  "save_steps": 500,
425
+ "stateful_callbacks": {
426
+ "TrainerControl": {
427
+ "args": {
428
+ "should_epoch_stop": false,
429
+ "should_evaluate": false,
430
+ "should_log": false,
431
+ "should_save": true,
432
+ "should_training_stop": true
433
+ },
434
+ "attributes": {}
435
+ }
436
+ },
437
+ "total_flos": 6.51425759341314e+17,
438
  "train_batch_size": 32,
439
  "trial_name": null,
440
  "trial_params": null