yuweiiizz commited on
Commit
65a9a69
·
verified ·
1 Parent(s): 530a659

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27dfc3f476549bed133cf918ed6f896e43f792a41e630a39487725be9ec39ef0
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703a6ac7121d730d9c0e42b1a06b45963892be5944000d1f0185ce7044bc3c9e
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab736d290d4d38ce8584a21f9201c95d1dc4854988ff48ca8afd3e0962a1f95e
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddfbaef04fe7f922a8f6475913402834b772a6863710273bbd173e2839db0fd8
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1f09b1f1f9b06ad2afb12e89fc8695073b76afcf9ea0b3552c7069932117824
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1877997138fd6c00a4ddcc0ec7e9c019b9f4ccb7a15031d5cedb28ccb7a2c96c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a51738ca0af55e803ac3fdbc0e3b67846eda4ca13018dd2960c216586e47c984
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29fb9e79fa30fbb431af919246a50a3118e2599b8f861d1f7ece53767b613869
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 61.163904814262146,
3
- "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-1000",
4
- "epoch": 0.4,
5
  "eval_steps": 1000,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -296,6 +296,295 @@
296
  "eval_samples_per_second": 2.257,
297
  "eval_steps_per_second": 0.282,
298
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  }
300
  ],
301
  "logging_steps": 25,
@@ -303,7 +592,7 @@
303
  "num_input_tokens_seen": 0,
304
  "num_train_epochs": 2,
305
  "save_steps": 1000,
306
- "total_flos": 4.61736640512e+18,
307
  "train_batch_size": 8,
308
  "trial_name": null,
309
  "trial_params": null
 
1
  {
2
+ "best_metric": 51.77491557370612,
3
+ "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-2000",
4
+ "epoch": 0.8,
5
  "eval_steps": 1000,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
296
  "eval_samples_per_second": 2.257,
297
  "eval_steps_per_second": 0.282,
298
  "step": 1000
299
+ },
300
+ {
301
+ "epoch": 0.41,
302
+ "grad_norm": 11.293231964111328,
303
+ "learning_rate": 8.833333333333334e-06,
304
+ "loss": 1.153,
305
+ "step": 1025
306
+ },
307
+ {
308
+ "epoch": 0.42,
309
+ "grad_norm": 13.314165115356445,
310
+ "learning_rate": 8.777777777777778e-06,
311
+ "loss": 1.168,
312
+ "step": 1050
313
+ },
314
+ {
315
+ "epoch": 0.43,
316
+ "grad_norm": 13.231385231018066,
317
+ "learning_rate": 8.722222222222224e-06,
318
+ "loss": 1.1613,
319
+ "step": 1075
320
+ },
321
+ {
322
+ "epoch": 0.44,
323
+ "grad_norm": 13.21717643737793,
324
+ "learning_rate": 8.666666666666668e-06,
325
+ "loss": 1.1246,
326
+ "step": 1100
327
+ },
328
+ {
329
+ "epoch": 0.45,
330
+ "grad_norm": 11.046935081481934,
331
+ "learning_rate": 8.611111111111112e-06,
332
+ "loss": 1.088,
333
+ "step": 1125
334
+ },
335
+ {
336
+ "epoch": 0.46,
337
+ "grad_norm": 14.906622886657715,
338
+ "learning_rate": 8.555555555555556e-06,
339
+ "loss": 1.19,
340
+ "step": 1150
341
+ },
342
+ {
343
+ "epoch": 0.47,
344
+ "grad_norm": 14.302517890930176,
345
+ "learning_rate": 8.5e-06,
346
+ "loss": 1.1351,
347
+ "step": 1175
348
+ },
349
+ {
350
+ "epoch": 0.48,
351
+ "grad_norm": 13.947770118713379,
352
+ "learning_rate": 8.444444444444446e-06,
353
+ "loss": 1.057,
354
+ "step": 1200
355
+ },
356
+ {
357
+ "epoch": 0.49,
358
+ "grad_norm": 14.45609188079834,
359
+ "learning_rate": 8.38888888888889e-06,
360
+ "loss": 1.0993,
361
+ "step": 1225
362
+ },
363
+ {
364
+ "epoch": 0.5,
365
+ "grad_norm": 14.952827453613281,
366
+ "learning_rate": 8.333333333333334e-06,
367
+ "loss": 1.1626,
368
+ "step": 1250
369
+ },
370
+ {
371
+ "epoch": 0.51,
372
+ "grad_norm": 16.128353118896484,
373
+ "learning_rate": 8.277777777777778e-06,
374
+ "loss": 1.1082,
375
+ "step": 1275
376
+ },
377
+ {
378
+ "epoch": 0.52,
379
+ "grad_norm": 13.550396919250488,
380
+ "learning_rate": 8.222222222222222e-06,
381
+ "loss": 1.183,
382
+ "step": 1300
383
+ },
384
+ {
385
+ "epoch": 0.53,
386
+ "grad_norm": 14.400228500366211,
387
+ "learning_rate": 8.166666666666668e-06,
388
+ "loss": 1.1988,
389
+ "step": 1325
390
+ },
391
+ {
392
+ "epoch": 0.54,
393
+ "grad_norm": 13.9801607131958,
394
+ "learning_rate": 8.111111111111112e-06,
395
+ "loss": 1.1314,
396
+ "step": 1350
397
+ },
398
+ {
399
+ "epoch": 0.55,
400
+ "grad_norm": 12.84874439239502,
401
+ "learning_rate": 8.055555555555557e-06,
402
+ "loss": 1.1411,
403
+ "step": 1375
404
+ },
405
+ {
406
+ "epoch": 0.56,
407
+ "grad_norm": 14.126324653625488,
408
+ "learning_rate": 8.000000000000001e-06,
409
+ "loss": 1.1314,
410
+ "step": 1400
411
+ },
412
+ {
413
+ "epoch": 0.57,
414
+ "grad_norm": 12.402750015258789,
415
+ "learning_rate": 7.944444444444445e-06,
416
+ "loss": 1.1071,
417
+ "step": 1425
418
+ },
419
+ {
420
+ "epoch": 0.58,
421
+ "grad_norm": 13.835284233093262,
422
+ "learning_rate": 7.88888888888889e-06,
423
+ "loss": 1.1393,
424
+ "step": 1450
425
+ },
426
+ {
427
+ "epoch": 0.59,
428
+ "grad_norm": 12.414569854736328,
429
+ "learning_rate": 7.833333333333333e-06,
430
+ "loss": 1.1026,
431
+ "step": 1475
432
+ },
433
+ {
434
+ "epoch": 0.6,
435
+ "grad_norm": 15.43626880645752,
436
+ "learning_rate": 7.77777777777778e-06,
437
+ "loss": 1.164,
438
+ "step": 1500
439
+ },
440
+ {
441
+ "epoch": 0.61,
442
+ "grad_norm": 13.067487716674805,
443
+ "learning_rate": 7.722222222222223e-06,
444
+ "loss": 1.0448,
445
+ "step": 1525
446
+ },
447
+ {
448
+ "epoch": 0.62,
449
+ "grad_norm": 14.158551216125488,
450
+ "learning_rate": 7.666666666666667e-06,
451
+ "loss": 1.1674,
452
+ "step": 1550
453
+ },
454
+ {
455
+ "epoch": 0.63,
456
+ "grad_norm": 13.062005996704102,
457
+ "learning_rate": 7.611111111111111e-06,
458
+ "loss": 1.0916,
459
+ "step": 1575
460
+ },
461
+ {
462
+ "epoch": 0.64,
463
+ "grad_norm": 13.6104736328125,
464
+ "learning_rate": 7.555555555555556e-06,
465
+ "loss": 1.0424,
466
+ "step": 1600
467
+ },
468
+ {
469
+ "epoch": 0.65,
470
+ "grad_norm": 11.52835750579834,
471
+ "learning_rate": 7.500000000000001e-06,
472
+ "loss": 1.0196,
473
+ "step": 1625
474
+ },
475
+ {
476
+ "epoch": 0.66,
477
+ "grad_norm": 14.118935585021973,
478
+ "learning_rate": 7.444444444444445e-06,
479
+ "loss": 1.1502,
480
+ "step": 1650
481
+ },
482
+ {
483
+ "epoch": 0.67,
484
+ "grad_norm": 13.2473726272583,
485
+ "learning_rate": 7.38888888888889e-06,
486
+ "loss": 1.0562,
487
+ "step": 1675
488
+ },
489
+ {
490
+ "epoch": 0.68,
491
+ "grad_norm": 13.026944160461426,
492
+ "learning_rate": 7.333333333333333e-06,
493
+ "loss": 1.0391,
494
+ "step": 1700
495
+ },
496
+ {
497
+ "epoch": 0.69,
498
+ "grad_norm": 11.923539161682129,
499
+ "learning_rate": 7.277777777777778e-06,
500
+ "loss": 1.063,
501
+ "step": 1725
502
+ },
503
+ {
504
+ "epoch": 0.7,
505
+ "grad_norm": 10.581581115722656,
506
+ "learning_rate": 7.222222222222223e-06,
507
+ "loss": 1.0504,
508
+ "step": 1750
509
+ },
510
+ {
511
+ "epoch": 0.71,
512
+ "grad_norm": 13.761798858642578,
513
+ "learning_rate": 7.166666666666667e-06,
514
+ "loss": 1.1781,
515
+ "step": 1775
516
+ },
517
+ {
518
+ "epoch": 0.72,
519
+ "grad_norm": 13.440286636352539,
520
+ "learning_rate": 7.111111111111112e-06,
521
+ "loss": 1.088,
522
+ "step": 1800
523
+ },
524
+ {
525
+ "epoch": 0.73,
526
+ "grad_norm": 11.378331184387207,
527
+ "learning_rate": 7.055555555555557e-06,
528
+ "loss": 1.017,
529
+ "step": 1825
530
+ },
531
+ {
532
+ "epoch": 0.74,
533
+ "grad_norm": 16.24916648864746,
534
+ "learning_rate": 7e-06,
535
+ "loss": 1.0669,
536
+ "step": 1850
537
+ },
538
+ {
539
+ "epoch": 0.75,
540
+ "grad_norm": 14.499041557312012,
541
+ "learning_rate": 6.944444444444445e-06,
542
+ "loss": 1.023,
543
+ "step": 1875
544
+ },
545
+ {
546
+ "epoch": 0.76,
547
+ "grad_norm": 14.587787628173828,
548
+ "learning_rate": 6.88888888888889e-06,
549
+ "loss": 1.1128,
550
+ "step": 1900
551
+ },
552
+ {
553
+ "epoch": 0.77,
554
+ "grad_norm": 14.249890327453613,
555
+ "learning_rate": 6.833333333333334e-06,
556
+ "loss": 1.0462,
557
+ "step": 1925
558
+ },
559
+ {
560
+ "epoch": 0.78,
561
+ "grad_norm": 13.22544002532959,
562
+ "learning_rate": 6.777777777777779e-06,
563
+ "loss": 1.0564,
564
+ "step": 1950
565
+ },
566
+ {
567
+ "epoch": 0.79,
568
+ "grad_norm": 13.404162406921387,
569
+ "learning_rate": 6.7222222222222235e-06,
570
+ "loss": 1.0517,
571
+ "step": 1975
572
+ },
573
+ {
574
+ "epoch": 0.8,
575
+ "grad_norm": 13.87370491027832,
576
+ "learning_rate": 6.666666666666667e-06,
577
+ "loss": 1.0556,
578
+ "step": 2000
579
+ },
580
+ {
581
+ "epoch": 0.8,
582
+ "eval_cer": 51.77491557370612,
583
+ "eval_loss": 1.0214924812316895,
584
+ "eval_runtime": 1738.5549,
585
+ "eval_samples_per_second": 2.264,
586
+ "eval_steps_per_second": 0.283,
587
+ "step": 2000
588
  }
589
  ],
590
  "logging_steps": 25,
 
592
  "num_input_tokens_seen": 0,
593
  "num_train_epochs": 2,
594
  "save_steps": 1000,
595
+ "total_flos": 9.23473281024e+18,
596
  "train_batch_size": 8,
597
  "trial_name": null,
598
  "trial_params": null