rttl commited on
Commit
a0ab59d
1 Parent(s): 18461c1

Upload 10 files

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +176 -104
  6. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0829508767711a32a7b3407530152c4015bb2a390b78449ed6868ddeb850132
3
  size 2681380253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7e9df408ca2438a9552a78c68d2e86f3adbe8740cc4804a95b7ca8f629c596f
3
  size 2681380253
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48347d8387bcde74d443730e5a4a3b241a4826d0ad844f07f91ec9f7414d7d58
3
  size 1340711725
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc0c0d6bac550fffbecbbb43f75c7e4426a1e156611b0ebaa47e933558d93888
3
  size 1340711725
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd9a9957442eed6a503ce4ce093fa8a50609104db077223260ae7f86b1a6609c
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406b9393c993b3cb066618b85d5b99ae8ed0158a4135bd7e8ab5421523207788
3
  size 15523
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e60258e9108ffab60da59fa90b3efa9a2ff8ccdb3d81b236c9ad41f10148a7d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9be1c2e449227164add711a419ee9d822a66d379ed17b9fb1f1d4d3df3b9d8aa
3
  size 623
trainer_state.json CHANGED
@@ -2,175 +2,247 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 10.0,
5
- "global_step": 6070,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.82,
12
- "learning_rate": 1.8352553542009887e-05,
13
- "loss": 0.2073,
14
  "step": 500
15
  },
 
 
 
 
 
 
16
  {
17
  "epoch": 1.0,
18
- "eval_f1": 0.9750730374140043,
19
- "eval_loss": 0.08468761295080185,
20
- "eval_runtime": 237.9595,
21
- "eval_samples_per_second": 326.173,
22
- "eval_steps_per_second": 2.551,
23
- "step": 607
 
 
 
 
 
 
24
  },
25
  {
26
  "epoch": 1.65,
27
- "learning_rate": 1.6705107084019773e-05,
28
- "loss": 0.1121,
29
- "step": 1000
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_f1": 0.986432880313593,
34
- "eval_loss": 0.04612227529287338,
35
- "eval_runtime": 237.9341,
36
- "eval_samples_per_second": 326.208,
37
- "eval_steps_per_second": 2.551,
38
- "step": 1214
 
 
 
 
 
 
39
  },
40
  {
41
  "epoch": 2.47,
42
- "learning_rate": 1.5057660626029655e-05,
43
- "loss": 0.0823,
44
- "step": 1500
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 3.0,
48
- "eval_f1": 0.9916420153073202,
49
- "eval_loss": 0.02950172871351242,
50
- "eval_runtime": 237.7454,
51
- "eval_samples_per_second": 326.467,
52
- "eval_steps_per_second": 2.553,
53
- "step": 1821
54
  },
55
  {
56
- "epoch": 3.29,
57
- "learning_rate": 1.341021416803954e-05,
58
- "loss": 0.0593,
59
- "step": 2000
 
 
 
 
 
 
60
  },
61
  {
62
  "epoch": 4.0,
63
- "eval_f1": 0.9947999625222524,
64
- "eval_loss": 0.01778862066566944,
65
- "eval_runtime": 237.6987,
66
- "eval_samples_per_second": 326.531,
67
- "eval_steps_per_second": 2.554,
68
- "step": 2428
69
  },
70
  {
71
  "epoch": 4.12,
72
- "learning_rate": 1.1762767710049425e-05,
73
- "loss": 0.0422,
74
- "step": 2500
75
  },
76
  {
77
- "epoch": 4.94,
78
- "learning_rate": 1.011532125205931e-05,
79
- "loss": 0.03,
80
- "step": 3000
 
 
 
 
 
 
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_f1": 0.9961641778789194,
85
- "eval_loss": 0.012077880091965199,
86
- "eval_runtime": 237.5463,
87
- "eval_samples_per_second": 326.741,
88
- "eval_steps_per_second": 2.555,
89
- "step": 3035
 
 
 
 
 
 
90
  },
91
  {
92
  "epoch": 5.77,
93
- "learning_rate": 8.467874794069193e-06,
94
- "loss": 0.0215,
95
- "step": 3500
96
  },
97
  {
98
  "epoch": 6.0,
99
- "eval_f1": 0.9972702243570969,
100
- "eval_loss": 0.008092650212347507,
101
- "eval_runtime": 237.646,
102
- "eval_samples_per_second": 326.603,
103
- "eval_steps_per_second": 2.554,
104
- "step": 3642
105
  },
106
  {
107
- "epoch": 6.59,
108
- "learning_rate": 6.8204283360790776e-06,
109
- "loss": 0.0159,
110
- "step": 4000
 
 
 
 
 
 
111
  },
112
  {
113
  "epoch": 7.0,
114
- "eval_f1": 0.997763649349585,
115
- "eval_loss": 0.006471516564488411,
116
- "eval_runtime": 237.5957,
117
- "eval_samples_per_second": 326.673,
118
- "eval_steps_per_second": 2.555,
119
- "step": 4249
120
  },
121
  {
122
- "epoch": 7.41,
123
- "learning_rate": 5.172981878088963e-06,
124
- "loss": 0.0141,
125
- "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
126
  },
127
  {
128
  "epoch": 8.0,
129
- "eval_f1": 0.9986060513769636,
130
- "eval_loss": 0.0039009908214211464,
131
- "eval_runtime": 237.631,
132
- "eval_samples_per_second": 326.624,
133
- "eval_steps_per_second": 2.554,
134
- "step": 4856
135
  },
136
  {
137
  "epoch": 8.24,
138
- "learning_rate": 3.525535420098847e-06,
139
- "loss": 0.0089,
140
- "step": 5000
 
 
 
 
 
 
141
  },
142
  {
143
  "epoch": 9.0,
144
- "eval_f1": 0.9990627269958058,
145
- "eval_loss": 0.0030426010489463806,
146
- "eval_runtime": 237.6288,
147
- "eval_samples_per_second": 326.627,
148
- "eval_steps_per_second": 2.554,
149
- "step": 5463
150
  },
151
  {
152
- "epoch": 9.06,
153
- "learning_rate": 1.8780889621087318e-06,
154
- "loss": 0.0079,
155
- "step": 5500
156
  },
157
  {
158
- "epoch": 9.88,
159
- "learning_rate": 2.3064250411861617e-07,
160
- "loss": 0.0057,
161
- "step": 6000
 
 
 
 
 
 
162
  },
163
  {
164
  "epoch": 10.0,
165
- "eval_f1": 0.9991682773970597,
166
- "eval_loss": 0.002530897269025445,
167
- "eval_runtime": 237.7374,
168
- "eval_samples_per_second": 326.478,
169
- "eval_steps_per_second": 2.553,
170
- "step": 6070
171
  }
172
  ],
173
- "max_steps": 6070,
174
  "num_train_epochs": 10,
175
  "total_flos": 1.8083196174016512e+17,
176
  "trial_name": null,
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 10.0,
5
+ "global_step": 12130,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.41,
12
+ "learning_rate": 1.9175597691673537e-05,
13
+ "loss": 0.2206,
14
  "step": 500
15
  },
16
+ {
17
+ "epoch": 0.82,
18
+ "learning_rate": 1.8351195383347076e-05,
19
+ "loss": 0.1699,
20
+ "step": 1000
21
+ },
22
  {
23
  "epoch": 1.0,
24
+ "eval_accuracy": 0.9739486703772418,
25
+ "eval_loss": 0.07855913788080215,
26
+ "eval_runtime": 265.3419,
27
+ "eval_samples_per_second": 292.513,
28
+ "eval_steps_per_second": 4.571,
29
+ "step": 1213
30
+ },
31
+ {
32
+ "epoch": 1.24,
33
+ "learning_rate": 1.752679307502061e-05,
34
+ "loss": 0.118,
35
+ "step": 1500
36
  },
37
  {
38
  "epoch": 1.65,
39
+ "learning_rate": 1.6702390766694146e-05,
40
+ "loss": 0.0991,
41
+ "step": 2000
42
  },
43
  {
44
  "epoch": 2.0,
45
+ "eval_accuracy": 0.9866393527107813,
46
+ "eval_loss": 0.04234647750854492,
47
+ "eval_runtime": 267.207,
48
+ "eval_samples_per_second": 290.471,
49
+ "eval_steps_per_second": 4.54,
50
+ "step": 2426
51
+ },
52
+ {
53
+ "epoch": 2.06,
54
+ "learning_rate": 1.5877988458367685e-05,
55
+ "loss": 0.0979,
56
+ "step": 2500
57
  },
58
  {
59
  "epoch": 2.47,
60
+ "learning_rate": 1.5053586150041222e-05,
61
+ "loss": 0.0573,
62
+ "step": 3000
63
+ },
64
+ {
65
+ "epoch": 2.89,
66
+ "learning_rate": 1.4229183841714757e-05,
67
+ "loss": 0.0653,
68
+ "step": 3500
69
  },
70
  {
71
  "epoch": 3.0,
72
+ "eval_accuracy": 0.9903241599670171,
73
+ "eval_loss": 0.029069583863019943,
74
+ "eval_runtime": 266.0985,
75
+ "eval_samples_per_second": 291.681,
76
+ "eval_steps_per_second": 4.558,
77
+ "step": 3639
78
  },
79
  {
80
+ "epoch": 3.3,
81
+ "learning_rate": 1.3404781533388294e-05,
82
+ "loss": 0.0492,
83
+ "step": 4000
84
+ },
85
+ {
86
+ "epoch": 3.71,
87
+ "learning_rate": 1.2580379225061831e-05,
88
+ "loss": 0.0442,
89
+ "step": 4500
90
  },
91
  {
92
  "epoch": 4.0,
93
+ "eval_accuracy": 0.9942151102865389,
94
+ "eval_loss": 0.01643509231507778,
95
+ "eval_runtime": 266.3375,
96
+ "eval_samples_per_second": 291.42,
97
+ "eval_steps_per_second": 4.554,
98
+ "step": 4852
99
  },
100
  {
101
  "epoch": 4.12,
102
+ "learning_rate": 1.1755976916735368e-05,
103
+ "loss": 0.0361,
104
+ "step": 5000
105
  },
106
  {
107
+ "epoch": 4.53,
108
+ "learning_rate": 1.0931574608408903e-05,
109
+ "loss": 0.0296,
110
+ "step": 5500
111
+ },
112
+ {
113
+ "epoch": 4.95,
114
+ "learning_rate": 1.0107172300082442e-05,
115
+ "loss": 0.0315,
116
+ "step": 6000
117
  },
118
  {
119
  "epoch": 5.0,
120
+ "eval_accuracy": 0.9958256029684601,
121
+ "eval_loss": 0.010281954891979694,
122
+ "eval_runtime": 266.335,
123
+ "eval_samples_per_second": 291.422,
124
+ "eval_steps_per_second": 4.554,
125
+ "step": 6065
126
+ },
127
+ {
128
+ "epoch": 5.36,
129
+ "learning_rate": 9.282769991755977e-06,
130
+ "loss": 0.0189,
131
+ "step": 6500
132
  },
133
  {
134
  "epoch": 5.77,
135
+ "learning_rate": 8.458367683429514e-06,
136
+ "loss": 0.0207,
137
+ "step": 7000
138
  },
139
  {
140
  "epoch": 6.0,
141
+ "eval_accuracy": 0.9973587920016491,
142
+ "eval_loss": 0.0071565331891179085,
143
+ "eval_runtime": 266.5437,
144
+ "eval_samples_per_second": 291.194,
145
+ "eval_steps_per_second": 4.551,
146
+ "step": 7278
147
  },
148
  {
149
+ "epoch": 6.18,
150
+ "learning_rate": 7.633965375103051e-06,
151
+ "loss": 0.0171,
152
+ "step": 7500
153
+ },
154
+ {
155
+ "epoch": 6.6,
156
+ "learning_rate": 6.809563066776587e-06,
157
+ "loss": 0.0133,
158
+ "step": 8000
159
  },
160
  {
161
  "epoch": 7.0,
162
+ "eval_accuracy": 0.9979514533085961,
163
+ "eval_loss": 0.004438882227987051,
164
+ "eval_runtime": 266.6565,
165
+ "eval_samples_per_second": 291.071,
166
+ "eval_steps_per_second": 4.549,
167
+ "step": 8491
168
  },
169
  {
170
+ "epoch": 7.01,
171
+ "learning_rate": 5.9851607584501235e-06,
172
+ "loss": 0.0152,
173
+ "step": 8500
174
+ },
175
+ {
176
+ "epoch": 7.42,
177
+ "learning_rate": 5.1607584501236605e-06,
178
+ "loss": 0.0082,
179
+ "step": 9000
180
+ },
181
+ {
182
+ "epoch": 7.83,
183
+ "learning_rate": 4.3363561417971975e-06,
184
+ "loss": 0.0109,
185
+ "step": 9500
186
  },
187
  {
188
  "epoch": 8.0,
189
+ "eval_accuracy": 0.9985183467326324,
190
+ "eval_loss": 0.0034066857770085335,
191
+ "eval_runtime": 266.0238,
192
+ "eval_samples_per_second": 291.763,
193
+ "eval_steps_per_second": 4.56,
194
+ "step": 9704
195
  },
196
  {
197
  "epoch": 8.24,
198
+ "learning_rate": 3.5119538334707345e-06,
199
+ "loss": 0.0077,
200
+ "step": 10000
201
+ },
202
+ {
203
+ "epoch": 8.66,
204
+ "learning_rate": 2.6875515251442706e-06,
205
+ "loss": 0.0059,
206
+ "step": 10500
207
  },
208
  {
209
  "epoch": 9.0,
210
+ "eval_accuracy": 0.9989177489177489,
211
+ "eval_loss": 0.0024579998571425676,
212
+ "eval_runtime": 264.7367,
213
+ "eval_samples_per_second": 293.182,
214
+ "eval_steps_per_second": 4.582,
215
+ "step": 10917
216
  },
217
  {
218
+ "epoch": 9.07,
219
+ "learning_rate": 1.8631492168178072e-06,
220
+ "loss": 0.0077,
221
+ "step": 11000
222
  },
223
  {
224
+ "epoch": 9.48,
225
+ "learning_rate": 1.038746908491344e-06,
226
+ "loss": 0.0033,
227
+ "step": 11500
228
+ },
229
+ {
230
+ "epoch": 9.89,
231
+ "learning_rate": 2.1434460016488048e-07,
232
+ "loss": 0.0043,
233
+ "step": 12000
234
  },
235
  {
236
  "epoch": 10.0,
237
+ "eval_accuracy": 0.9992398474541332,
238
+ "eval_loss": 0.0018210469279438257,
239
+ "eval_runtime": 264.6034,
240
+ "eval_samples_per_second": 293.33,
241
+ "eval_steps_per_second": 4.584,
242
+ "step": 12130
243
  }
244
  ],
245
+ "max_steps": 12130,
246
  "num_train_epochs": 10,
247
  "total_flos": 1.8083196174016512e+17,
248
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe128dfde7edbd11404fb7a18d1c22f32efb6d12770f4d5ee4a9f8f1faabbd70
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:767854cf2766e62aff85fa5b0170bb26f740c4b5f2388312badb7574061ef623
3
  size 3439