DewiBrynJones commited on
Commit
66894f6
1 Parent(s): ecd1b6b

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: DewiBrynJones/wav2vec2-xlsr-53-ft-btb-cv-cy
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
 
2
  license: apache-2.0
3
  base_model: DewiBrynJones/wav2vec2-xlsr-53-ft-btb-cv-cy
4
  tags:
5
+ - automatic-speech-recognition
6
+ - ./data-configs/cv.json
7
  - generated_from_trainer
8
  metrics:
9
  - wer
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.020080321285141,
3
+ "eval_loss": 0.2515573501586914,
4
+ "eval_runtime": 200.0113,
5
+ "eval_samples": 5381,
6
+ "eval_samples_per_second": 26.903,
7
+ "eval_steps_per_second": 0.425,
8
+ "eval_wer": 0.24033088747042097,
9
+ "total_flos": 6.356146932571761e+18,
10
+ "train_loss": 0.3559208065032959,
11
+ "train_runtime": 12994.9936,
12
+ "train_samples": 7968,
13
+ "train_samples_per_second": 3.078,
14
+ "train_steps_per_second": 0.77
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.020080321285141,
3
+ "eval_loss": 0.2515573501586914,
4
+ "eval_runtime": 200.0113,
5
+ "eval_samples": 5381,
6
+ "eval_samples_per_second": 26.903,
7
+ "eval_steps_per_second": 0.425,
8
+ "eval_wer": 0.24033088747042097
9
+ }
runs/Oct04_07-33-11_a3cadab5f73a/events.out.tfevents.1728037286.a3cadab5f73a.29.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b300327309affd6dd65c72ca0a53b29a3c009fa29d4b469e4cee0ba5e2c9d898
3
+ size 406
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.020080321285141,
3
+ "total_flos": 6.356146932571761e+18,
4
+ "train_loss": 0.3559208065032959,
5
+ "train_runtime": 12994.9936,
6
+ "train_samples": 7968,
7
+ "train_samples_per_second": 3.078,
8
+ "train_steps_per_second": 0.77
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,632 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.020080321285141,
5
+ "eval_steps": 200,
6
+ "global_step": 10000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10040160642570281,
13
+ "eval_loss": 0.3807084858417511,
14
+ "eval_runtime": 206.6442,
15
+ "eval_samples_per_second": 26.04,
16
+ "eval_steps_per_second": 0.411,
17
+ "eval_wer": 0.2514466384298753,
18
+ "step": 200
19
+ },
20
+ {
21
+ "epoch": 0.20080321285140562,
22
+ "eval_loss": 0.253967821598053,
23
+ "eval_runtime": 206.4967,
24
+ "eval_samples_per_second": 26.059,
25
+ "eval_steps_per_second": 0.412,
26
+ "eval_wer": 0.2642923899858816,
27
+ "step": 400
28
+ },
29
+ {
30
+ "epoch": 0.25100401606425704,
31
+ "grad_norm": 1.857898235321045,
32
+ "learning_rate": 0.0001491,
33
+ "loss": 2.4874,
34
+ "step": 500
35
+ },
36
+ {
37
+ "epoch": 0.30120481927710846,
38
+ "eval_loss": 0.2642447352409363,
39
+ "eval_runtime": 205.9594,
40
+ "eval_samples_per_second": 26.127,
41
+ "eval_steps_per_second": 0.413,
42
+ "eval_wer": 0.30376424267732505,
43
+ "step": 600
44
+ },
45
+ {
46
+ "epoch": 0.40160642570281124,
47
+ "eval_loss": 0.3125462234020233,
48
+ "eval_runtime": 205.1387,
49
+ "eval_samples_per_second": 26.231,
50
+ "eval_steps_per_second": 0.414,
51
+ "eval_wer": 0.39048300821253157,
52
+ "step": 800
53
+ },
54
+ {
55
+ "epoch": 0.5020080321285141,
56
+ "grad_norm": 3.634869337081909,
57
+ "learning_rate": 0.00029909999999999995,
58
+ "loss": 0.3991,
59
+ "step": 1000
60
+ },
61
+ {
62
+ "epoch": 0.5020080321285141,
63
+ "eval_loss": 0.3531426191329956,
64
+ "eval_runtime": 205.1455,
65
+ "eval_samples_per_second": 26.23,
66
+ "eval_steps_per_second": 0.414,
67
+ "eval_wer": 0.3939430094056354,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 0.6024096385542169,
72
+ "eval_loss": 0.3571958839893341,
73
+ "eval_runtime": 209.3856,
74
+ "eval_samples_per_second": 25.699,
75
+ "eval_steps_per_second": 0.406,
76
+ "eval_wer": 0.40390542663405515,
77
+ "step": 1200
78
+ },
79
+ {
80
+ "epoch": 0.7028112449799196,
81
+ "eval_loss": 0.36791086196899414,
82
+ "eval_runtime": 206.8164,
83
+ "eval_samples_per_second": 26.018,
84
+ "eval_steps_per_second": 0.411,
85
+ "eval_wer": 0.4052576110083716,
86
+ "step": 1400
87
+ },
88
+ {
89
+ "epoch": 0.7530120481927711,
90
+ "grad_norm": 3.5483558177948,
91
+ "learning_rate": 0.0002834333333333333,
92
+ "loss": 0.4512,
93
+ "step": 1500
94
+ },
95
+ {
96
+ "epoch": 0.8032128514056225,
97
+ "eval_loss": 0.35897189378738403,
98
+ "eval_runtime": 207.7252,
99
+ "eval_samples_per_second": 25.904,
100
+ "eval_steps_per_second": 0.409,
101
+ "eval_wer": 0.38767921414225776,
102
+ "step": 1600
103
+ },
104
+ {
105
+ "epoch": 0.9036144578313253,
106
+ "eval_loss": 0.3732704222202301,
107
+ "eval_runtime": 205.6494,
108
+ "eval_samples_per_second": 26.166,
109
+ "eval_steps_per_second": 0.413,
110
+ "eval_wer": 0.4006840462128895,
111
+ "step": 1800
112
+ },
113
+ {
114
+ "epoch": 1.0040160642570282,
115
+ "grad_norm": 2.2905380725860596,
116
+ "learning_rate": 0.00026676666666666663,
117
+ "loss": 0.4333,
118
+ "step": 2000
119
+ },
120
+ {
121
+ "epoch": 1.0040160642570282,
122
+ "eval_loss": 0.377088725566864,
123
+ "eval_runtime": 204.613,
124
+ "eval_samples_per_second": 26.298,
125
+ "eval_steps_per_second": 0.415,
126
+ "eval_wer": 0.4243273876990992,
127
+ "step": 2000
128
+ },
129
+ {
130
+ "epoch": 1.104417670682731,
131
+ "eval_loss": 0.3604430556297302,
132
+ "eval_runtime": 208.0965,
133
+ "eval_samples_per_second": 25.858,
134
+ "eval_steps_per_second": 0.408,
135
+ "eval_wer": 0.3867048459901768,
136
+ "step": 2200
137
+ },
138
+ {
139
+ "epoch": 1.2048192771084336,
140
+ "eval_loss": 0.3431110978126526,
141
+ "eval_runtime": 206.2637,
142
+ "eval_samples_per_second": 26.088,
143
+ "eval_steps_per_second": 0.412,
144
+ "eval_wer": 0.38137564875022373,
145
+ "step": 2400
146
+ },
147
+ {
148
+ "epoch": 1.2550200803212852,
149
+ "grad_norm": 1.8699342012405396,
150
+ "learning_rate": 0.00025009999999999995,
151
+ "loss": 0.3468,
152
+ "step": 2500
153
+ },
154
+ {
155
+ "epoch": 1.3052208835341366,
156
+ "eval_loss": 0.32902058959007263,
157
+ "eval_runtime": 205.4777,
158
+ "eval_samples_per_second": 26.188,
159
+ "eval_steps_per_second": 0.414,
160
+ "eval_wer": 0.3778559923641353,
161
+ "step": 2600
162
+ },
163
+ {
164
+ "epoch": 1.4056224899598393,
165
+ "eval_loss": 0.33407700061798096,
166
+ "eval_runtime": 205.6359,
167
+ "eval_samples_per_second": 26.168,
168
+ "eval_steps_per_second": 0.413,
169
+ "eval_wer": 0.3647119648432063,
170
+ "step": 2800
171
+ },
172
+ {
173
+ "epoch": 1.5060240963855422,
174
+ "grad_norm": 2.062389373779297,
175
+ "learning_rate": 0.0002334333333333333,
176
+ "loss": 0.3503,
177
+ "step": 3000
178
+ },
179
+ {
180
+ "epoch": 1.5060240963855422,
181
+ "eval_loss": 0.3247535228729248,
182
+ "eval_runtime": 206.3116,
183
+ "eval_samples_per_second": 26.082,
184
+ "eval_steps_per_second": 0.412,
185
+ "eval_wer": 0.3614706993577124,
186
+ "step": 3000
187
+ },
188
+ {
189
+ "epoch": 1.606425702811245,
190
+ "eval_loss": 0.33116209506988525,
191
+ "eval_runtime": 203.9912,
192
+ "eval_samples_per_second": 26.379,
193
+ "eval_steps_per_second": 0.417,
194
+ "eval_wer": 0.35512736383702204,
195
+ "step": 3200
196
+ },
197
+ {
198
+ "epoch": 1.7068273092369477,
199
+ "eval_loss": 0.3410908281803131,
200
+ "eval_runtime": 204.5054,
201
+ "eval_samples_per_second": 26.312,
202
+ "eval_steps_per_second": 0.416,
203
+ "eval_wer": 0.3836226610193084,
204
+ "step": 3400
205
+ },
206
+ {
207
+ "epoch": 1.7570281124497993,
208
+ "grad_norm": 0.9907544255256653,
209
+ "learning_rate": 0.00021679999999999998,
210
+ "loss": 0.3418,
211
+ "step": 3500
212
+ },
213
+ {
214
+ "epoch": 1.8072289156626506,
215
+ "eval_loss": 0.3116574287414551,
216
+ "eval_runtime": 205.0392,
217
+ "eval_samples_per_second": 26.244,
218
+ "eval_steps_per_second": 0.415,
219
+ "eval_wer": 0.33752908190658,
220
+ "step": 3600
221
+ },
222
+ {
223
+ "epoch": 1.9076305220883534,
224
+ "eval_loss": 0.3196774423122406,
225
+ "eval_runtime": 206.2716,
226
+ "eval_samples_per_second": 26.087,
227
+ "eval_steps_per_second": 0.412,
228
+ "eval_wer": 0.34317644017578397,
229
+ "step": 3800
230
+ },
231
+ {
232
+ "epoch": 2.0080321285140563,
233
+ "grad_norm": 1.0384626388549805,
234
+ "learning_rate": 0.0002001333333333333,
235
+ "loss": 0.3181,
236
+ "step": 4000
237
+ },
238
+ {
239
+ "epoch": 2.0080321285140563,
240
+ "eval_loss": 0.30675315856933594,
241
+ "eval_runtime": 206.0737,
242
+ "eval_samples_per_second": 26.112,
243
+ "eval_steps_per_second": 0.412,
244
+ "eval_wer": 0.3339696553918352,
245
+ "step": 4000
246
+ },
247
+ {
248
+ "epoch": 2.108433734939759,
249
+ "eval_loss": 0.31376445293426514,
250
+ "eval_runtime": 209.2791,
251
+ "eval_samples_per_second": 25.712,
252
+ "eval_steps_per_second": 0.406,
253
+ "eval_wer": 0.3358388514386844,
254
+ "step": 4200
255
+ },
256
+ {
257
+ "epoch": 2.208835341365462,
258
+ "eval_loss": 0.31388720870018005,
259
+ "eval_runtime": 204.9118,
260
+ "eval_samples_per_second": 26.26,
261
+ "eval_steps_per_second": 0.415,
262
+ "eval_wer": 0.3333731034619897,
263
+ "step": 4400
264
+ },
265
+ {
266
+ "epoch": 2.2590361445783134,
267
+ "grad_norm": 0.5868389010429382,
268
+ "learning_rate": 0.00018346666666666664,
269
+ "loss": 0.2423,
270
+ "step": 4500
271
+ },
272
+ {
273
+ "epoch": 2.3092369477911645,
274
+ "eval_loss": 0.3191888928413391,
275
+ "eval_runtime": 204.834,
276
+ "eval_samples_per_second": 26.27,
277
+ "eval_steps_per_second": 0.415,
278
+ "eval_wer": 0.32848137763725666,
279
+ "step": 4600
280
+ },
281
+ {
282
+ "epoch": 2.4096385542168672,
283
+ "eval_loss": 0.2928995192050934,
284
+ "eval_runtime": 204.43,
285
+ "eval_samples_per_second": 26.322,
286
+ "eval_steps_per_second": 0.416,
287
+ "eval_wer": 0.31682872994094136,
288
+ "step": 4800
289
+ },
290
+ {
291
+ "epoch": 2.5100401606425704,
292
+ "grad_norm": 1.3247759342193604,
293
+ "learning_rate": 0.0001668,
294
+ "loss": 0.2327,
295
+ "step": 5000
296
+ },
297
+ {
298
+ "epoch": 2.5100401606425704,
299
+ "eval_loss": 0.29208171367645264,
300
+ "eval_runtime": 206.3612,
301
+ "eval_samples_per_second": 26.076,
302
+ "eval_steps_per_second": 0.412,
303
+ "eval_wer": 0.3103064288412973,
304
+ "step": 5000
305
+ },
306
+ {
307
+ "epoch": 2.610441767068273,
308
+ "eval_loss": 0.2801830470561981,
309
+ "eval_runtime": 204.3678,
310
+ "eval_samples_per_second": 26.33,
311
+ "eval_steps_per_second": 0.416,
312
+ "eval_wer": 0.3037443576129969,
313
+ "step": 5200
314
+ },
315
+ {
316
+ "epoch": 2.710843373493976,
317
+ "eval_loss": 0.2811721861362457,
318
+ "eval_runtime": 204.4403,
319
+ "eval_samples_per_second": 26.321,
320
+ "eval_steps_per_second": 0.416,
321
+ "eval_wer": 0.29624768836127185,
322
+ "step": 5400
323
+ },
324
+ {
325
+ "epoch": 2.7610441767068274,
326
+ "grad_norm": 1.381541132926941,
327
+ "learning_rate": 0.00015013333333333331,
328
+ "loss": 0.2374,
329
+ "step": 5500
330
+ },
331
+ {
332
+ "epoch": 2.8112449799196786,
333
+ "eval_loss": 0.28872984647750854,
334
+ "eval_runtime": 204.3069,
335
+ "eval_samples_per_second": 26.338,
336
+ "eval_steps_per_second": 0.416,
337
+ "eval_wer": 0.30422159915687325,
338
+ "step": 5600
339
+ },
340
+ {
341
+ "epoch": 2.9116465863453813,
342
+ "eval_loss": 0.27397701144218445,
343
+ "eval_runtime": 204.1464,
344
+ "eval_samples_per_second": 26.359,
345
+ "eval_steps_per_second": 0.416,
346
+ "eval_wer": 0.2927081469108553,
347
+ "step": 5800
348
+ },
349
+ {
350
+ "epoch": 3.0120481927710845,
351
+ "grad_norm": 1.4617916345596313,
352
+ "learning_rate": 0.00013346666666666667,
353
+ "loss": 0.2136,
354
+ "step": 6000
355
+ },
356
+ {
357
+ "epoch": 3.0120481927710845,
358
+ "eval_loss": 0.2662462592124939,
359
+ "eval_runtime": 203.8941,
360
+ "eval_samples_per_second": 26.391,
361
+ "eval_steps_per_second": 0.417,
362
+ "eval_wer": 0.28296446539004555,
363
+ "step": 6000
364
+ },
365
+ {
366
+ "epoch": 3.112449799196787,
367
+ "eval_loss": 0.28285130858421326,
368
+ "eval_runtime": 206.1704,
369
+ "eval_samples_per_second": 26.1,
370
+ "eval_steps_per_second": 0.412,
371
+ "eval_wer": 0.2890294100101414,
372
+ "step": 6200
373
+ },
374
+ {
375
+ "epoch": 3.21285140562249,
376
+ "eval_loss": 0.2729070484638214,
377
+ "eval_runtime": 206.2438,
378
+ "eval_samples_per_second": 26.09,
379
+ "eval_steps_per_second": 0.412,
380
+ "eval_wer": 0.28692159319135396,
381
+ "step": 6400
382
+ },
383
+ {
384
+ "epoch": 3.2630522088353415,
385
+ "grad_norm": 0.8870707750320435,
386
+ "learning_rate": 0.00011679999999999998,
387
+ "loss": 0.167,
388
+ "step": 6500
389
+ },
390
+ {
391
+ "epoch": 3.3132530120481927,
392
+ "eval_loss": 0.2776893675327301,
393
+ "eval_runtime": 204.2022,
394
+ "eval_samples_per_second": 26.351,
395
+ "eval_steps_per_second": 0.416,
396
+ "eval_wer": 0.28892998468850045,
397
+ "step": 6600
398
+ },
399
+ {
400
+ "epoch": 3.4136546184738954,
401
+ "eval_loss": 0.2711654603481293,
402
+ "eval_runtime": 203.7376,
403
+ "eval_samples_per_second": 26.411,
404
+ "eval_steps_per_second": 0.417,
405
+ "eval_wer": 0.28095607389289906,
406
+ "step": 6800
407
+ },
408
+ {
409
+ "epoch": 3.5140562248995986,
410
+ "grad_norm": 1.0165985822677612,
411
+ "learning_rate": 0.00010013333333333333,
412
+ "loss": 0.1614,
413
+ "step": 7000
414
+ },
415
+ {
416
+ "epoch": 3.5140562248995986,
417
+ "eval_loss": 0.2688385844230652,
418
+ "eval_runtime": 204.1623,
419
+ "eval_samples_per_second": 26.356,
420
+ "eval_steps_per_second": 0.416,
421
+ "eval_wer": 0.27091411640716656,
422
+ "step": 7000
423
+ },
424
+ {
425
+ "epoch": 3.6144578313253013,
426
+ "eval_loss": 0.2589295208454132,
427
+ "eval_runtime": 205.2749,
428
+ "eval_samples_per_second": 26.214,
429
+ "eval_steps_per_second": 0.414,
430
+ "eval_wer": 0.26626101135437175,
431
+ "step": 7200
432
+ },
433
+ {
434
+ "epoch": 3.714859437751004,
435
+ "eval_loss": 0.26514673233032227,
436
+ "eval_runtime": 204.2135,
437
+ "eval_samples_per_second": 26.35,
438
+ "eval_steps_per_second": 0.416,
439
+ "eval_wer": 0.2669768736701863,
440
+ "step": 7400
441
+ },
442
+ {
443
+ "epoch": 3.765060240963855,
444
+ "grad_norm": 0.7397546172142029,
445
+ "learning_rate": 8.346666666666666e-05,
446
+ "loss": 0.1529,
447
+ "step": 7500
448
+ },
449
+ {
450
+ "epoch": 3.8152610441767068,
451
+ "eval_loss": 0.25074735283851624,
452
+ "eval_runtime": 204.2336,
453
+ "eval_samples_per_second": 26.347,
454
+ "eval_steps_per_second": 0.416,
455
+ "eval_wer": 0.2637157231203643,
456
+ "step": 7600
457
+ },
458
+ {
459
+ "epoch": 3.9156626506024095,
460
+ "eval_loss": 0.2493942528963089,
461
+ "eval_runtime": 206.072,
462
+ "eval_samples_per_second": 26.112,
463
+ "eval_steps_per_second": 0.412,
464
+ "eval_wer": 0.2567957207341566,
465
+ "step": 7800
466
+ },
467
+ {
468
+ "epoch": 4.016064257028113,
469
+ "grad_norm": 0.785851776599884,
470
+ "learning_rate": 6.68e-05,
471
+ "loss": 0.1496,
472
+ "step": 8000
473
+ },
474
+ {
475
+ "epoch": 4.016064257028113,
476
+ "eval_loss": 0.25821030139923096,
477
+ "eval_runtime": 204.9558,
478
+ "eval_samples_per_second": 26.254,
479
+ "eval_steps_per_second": 0.415,
480
+ "eval_wer": 0.2580484797868321,
481
+ "step": 8000
482
+ },
483
+ {
484
+ "epoch": 4.116465863453815,
485
+ "eval_loss": 0.2650238871574402,
486
+ "eval_runtime": 204.7852,
487
+ "eval_samples_per_second": 26.276,
488
+ "eval_steps_per_second": 0.415,
489
+ "eval_wer": 0.25753146811429933,
490
+ "step": 8200
491
+ },
492
+ {
493
+ "epoch": 4.216867469879518,
494
+ "eval_loss": 0.26561084389686584,
495
+ "eval_runtime": 210.1403,
496
+ "eval_samples_per_second": 25.607,
497
+ "eval_steps_per_second": 0.404,
498
+ "eval_wer": 0.25598043309670104,
499
+ "step": 8400
500
+ },
501
+ {
502
+ "epoch": 4.267068273092369,
503
+ "grad_norm": 0.34119465947151184,
504
+ "learning_rate": 5.013333333333332e-05,
505
+ "loss": 0.1128,
506
+ "step": 8500
507
+ },
508
+ {
509
+ "epoch": 4.317269076305221,
510
+ "eval_loss": 0.25430822372436523,
511
+ "eval_runtime": 203.852,
512
+ "eval_samples_per_second": 26.397,
513
+ "eval_steps_per_second": 0.417,
514
+ "eval_wer": 0.25118813259360895,
515
+ "step": 8600
516
+ },
517
+ {
518
+ "epoch": 4.417670682730924,
519
+ "eval_loss": 0.2586837112903595,
520
+ "eval_runtime": 202.7677,
521
+ "eval_samples_per_second": 26.538,
522
+ "eval_steps_per_second": 0.419,
523
+ "eval_wer": 0.24987571834794886,
524
+ "step": 8800
525
+ },
526
+ {
527
+ "epoch": 4.518072289156627,
528
+ "grad_norm": 0.5558347105979919,
529
+ "learning_rate": 3.346666666666666e-05,
530
+ "loss": 0.1109,
531
+ "step": 9000
532
+ },
533
+ {
534
+ "epoch": 4.518072289156627,
535
+ "eval_loss": 0.2540307939052582,
536
+ "eval_runtime": 202.8954,
537
+ "eval_samples_per_second": 26.521,
538
+ "eval_steps_per_second": 0.419,
539
+ "eval_wer": 0.24599813080395316,
540
+ "step": 9000
541
+ },
542
+ {
543
+ "epoch": 4.618473895582329,
544
+ "eval_loss": 0.2546459436416626,
545
+ "eval_runtime": 208.4343,
546
+ "eval_samples_per_second": 25.816,
547
+ "eval_steps_per_second": 0.408,
548
+ "eval_wer": 0.24245858935353656,
549
+ "step": 9200
550
+ },
551
+ {
552
+ "epoch": 4.718875502008032,
553
+ "eval_loss": 0.25800344347953796,
554
+ "eval_runtime": 205.7304,
555
+ "eval_samples_per_second": 26.156,
556
+ "eval_steps_per_second": 0.413,
557
+ "eval_wer": 0.24198134780966016,
558
+ "step": 9400
559
+ },
560
+ {
561
+ "epoch": 4.769076305220883,
562
+ "grad_norm": 0.9226210117340088,
563
+ "learning_rate": 1.68e-05,
564
+ "loss": 0.1028,
565
+ "step": 9500
566
+ },
567
+ {
568
+ "epoch": 4.8192771084337345,
569
+ "eval_loss": 0.25135332345962524,
570
+ "eval_runtime": 203.1799,
571
+ "eval_samples_per_second": 26.484,
572
+ "eval_steps_per_second": 0.418,
573
+ "eval_wer": 0.24035077253474915,
574
+ "step": 9600
575
+ },
576
+ {
577
+ "epoch": 4.919678714859438,
578
+ "eval_loss": 0.2509777843952179,
579
+ "eval_runtime": 203.2596,
580
+ "eval_samples_per_second": 26.474,
581
+ "eval_steps_per_second": 0.418,
582
+ "eval_wer": 0.24025134721310823,
583
+ "step": 9800
584
+ },
585
+ {
586
+ "epoch": 5.020080321285141,
587
+ "grad_norm": 4.224822521209717,
588
+ "learning_rate": 1.3333333333333334e-07,
589
+ "loss": 0.1069,
590
+ "step": 10000
591
+ },
592
+ {
593
+ "epoch": 5.020080321285141,
594
+ "eval_loss": 0.2515573501586914,
595
+ "eval_runtime": 209.1262,
596
+ "eval_samples_per_second": 25.731,
597
+ "eval_steps_per_second": 0.406,
598
+ "eval_wer": 0.24033088747042097,
599
+ "step": 10000
600
+ },
601
+ {
602
+ "epoch": 5.020080321285141,
603
+ "step": 10000,
604
+ "total_flos": 6.356146932571761e+18,
605
+ "train_loss": 0.3559208065032959,
606
+ "train_runtime": 12994.9936,
607
+ "train_samples_per_second": 3.078,
608
+ "train_steps_per_second": 0.77
609
+ }
610
+ ],
611
+ "logging_steps": 500,
612
+ "max_steps": 10000,
613
+ "num_input_tokens_seen": 0,
614
+ "num_train_epochs": 6,
615
+ "save_steps": 500,
616
+ "stateful_callbacks": {
617
+ "TrainerControl": {
618
+ "args": {
619
+ "should_epoch_stop": false,
620
+ "should_evaluate": false,
621
+ "should_log": false,
622
+ "should_save": true,
623
+ "should_training_stop": true
624
+ },
625
+ "attributes": {}
626
+ }
627
+ },
628
+ "total_flos": 6.356146932571761e+18,
629
+ "train_batch_size": 4,
630
+ "trial_name": null,
631
+ "trial_params": null
632
+ }