yuweiiizz commited on
Commit
a3a8187
·
verified ·
1 Parent(s): 0487418

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -45,7 +45,7 @@
45
  "scale_embedding": false,
46
  "suppress_tokens": [],
47
  "torch_dtype": "float32",
48
- "transformers_version": "4.40.1",
49
  "use_cache": false,
50
  "use_weighted_layer_sum": false,
51
  "vocab_size": 51865
 
45
  "scale_embedding": false,
46
  "suppress_tokens": [],
47
  "torch_dtype": "float32",
48
+ "transformers_version": "4.40.2",
49
  "use_cache": false,
50
  "use_weighted_layer_sum": false,
51
  "vocab_size": 51865
last-checkpoint/generation_config.json CHANGED
@@ -262,5 +262,5 @@
262
  "transcribe": 50359,
263
  "translate": 50358
264
  },
265
- "transformers_version": "4.40.1"
266
  }
 
262
  "transcribe": 50359,
263
  "translate": 50358
264
  },
265
+ "transformers_version": "4.40.2"
266
  }
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0351867dc343cc644a28fc7f80c1d8e42d179bfa7434369ea19f3aff0a0e16ec
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec0246d4e2aea1b71a33338e4420dd5d8c26630b4c1753f038e7f2036aad545
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee26c72857b99db7819dd149497f03225610ad437f9f2abb41a02876eef42e29
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdc53657d0d3de6712008710ba891fd0a388a380e3678a28f24c312f466e7db5
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fce579953b334f6cfba9152781b4b5b016a30a4024c41dd066d03ae60bfaddc7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01936a26df76d30ee6550fdbb203f4526dab703ccbf83b9464caef2a32f84a5b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d6123b92a1a964482602c80eafadd952483e879e0467db7e659b5938ddcc1ab
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bdbe69a1efdf2a6b7b8df096446c782edc1c5607aae343868c0f0cf62a3941a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 50.39660724102725,
3
- "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-4000",
4
- "epoch": 2.0,
5
  "eval_steps": 1000,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1452,14 +1452,303 @@
1452
  "eval_samples_per_second": 2.246,
1453
  "eval_steps_per_second": 0.281,
1454
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1455
  }
1456
  ],
1457
  "logging_steps": 25,
1458
- "max_steps": 5000,
1459
  "num_input_tokens_seen": 0,
1460
- "num_train_epochs": 2,
1461
  "save_steps": 1000,
1462
- "total_flos": 2.30868320256e+19,
1463
  "train_batch_size": 8,
1464
  "trial_name": null,
1465
  "trial_params": null
 
1
  {
2
+ "best_metric": 49.85901151405969,
3
+ "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-6000",
4
+ "epoch": 2.4,
5
  "eval_steps": 1000,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1452
  "eval_samples_per_second": 2.246,
1453
  "eval_steps_per_second": 0.281,
1454
  "step": 5000
1455
+ },
1456
+ {
1457
+ "epoch": 2.01,
1458
+ "grad_norm": 12.426376342773438,
1459
+ "learning_rate": 4.907407407407408e-06,
1460
+ "loss": 0.5823,
1461
+ "step": 5025
1462
+ },
1463
+ {
1464
+ "epoch": 2.02,
1465
+ "grad_norm": 10.951026916503906,
1466
+ "learning_rate": 4.876543209876544e-06,
1467
+ "loss": 0.6085,
1468
+ "step": 5050
1469
+ },
1470
+ {
1471
+ "epoch": 2.03,
1472
+ "grad_norm": 10.53141975402832,
1473
+ "learning_rate": 4.845679012345679e-06,
1474
+ "loss": 0.5771,
1475
+ "step": 5075
1476
+ },
1477
+ {
1478
+ "epoch": 2.04,
1479
+ "grad_norm": 13.22917366027832,
1480
+ "learning_rate": 4.814814814814815e-06,
1481
+ "loss": 0.6013,
1482
+ "step": 5100
1483
+ },
1484
+ {
1485
+ "epoch": 2.05,
1486
+ "grad_norm": 12.245888710021973,
1487
+ "learning_rate": 4.783950617283951e-06,
1488
+ "loss": 0.559,
1489
+ "step": 5125
1490
+ },
1491
+ {
1492
+ "epoch": 2.06,
1493
+ "grad_norm": 10.822369575500488,
1494
+ "learning_rate": 4.753086419753087e-06,
1495
+ "loss": 0.6044,
1496
+ "step": 5150
1497
+ },
1498
+ {
1499
+ "epoch": 2.07,
1500
+ "grad_norm": 13.181960105895996,
1501
+ "learning_rate": 4.722222222222222e-06,
1502
+ "loss": 0.6063,
1503
+ "step": 5175
1504
+ },
1505
+ {
1506
+ "epoch": 2.08,
1507
+ "grad_norm": 10.481088638305664,
1508
+ "learning_rate": 4.691358024691358e-06,
1509
+ "loss": 0.572,
1510
+ "step": 5200
1511
+ },
1512
+ {
1513
+ "epoch": 2.09,
1514
+ "grad_norm": 10.6892671585083,
1515
+ "learning_rate": 4.660493827160494e-06,
1516
+ "loss": 0.6041,
1517
+ "step": 5225
1518
+ },
1519
+ {
1520
+ "epoch": 2.1,
1521
+ "grad_norm": 11.274572372436523,
1522
+ "learning_rate": 4.62962962962963e-06,
1523
+ "loss": 0.5608,
1524
+ "step": 5250
1525
+ },
1526
+ {
1527
+ "epoch": 2.11,
1528
+ "grad_norm": 11.774944305419922,
1529
+ "learning_rate": 4.598765432098766e-06,
1530
+ "loss": 0.6163,
1531
+ "step": 5275
1532
+ },
1533
+ {
1534
+ "epoch": 2.12,
1535
+ "grad_norm": 10.94110107421875,
1536
+ "learning_rate": 4.567901234567902e-06,
1537
+ "loss": 0.5816,
1538
+ "step": 5300
1539
+ },
1540
+ {
1541
+ "epoch": 2.13,
1542
+ "grad_norm": 10.177285194396973,
1543
+ "learning_rate": 4.537037037037038e-06,
1544
+ "loss": 0.557,
1545
+ "step": 5325
1546
+ },
1547
+ {
1548
+ "epoch": 2.14,
1549
+ "grad_norm": 9.426462173461914,
1550
+ "learning_rate": 4.506172839506173e-06,
1551
+ "loss": 0.6411,
1552
+ "step": 5350
1553
+ },
1554
+ {
1555
+ "epoch": 2.15,
1556
+ "grad_norm": 9.961886405944824,
1557
+ "learning_rate": 4.475308641975309e-06,
1558
+ "loss": 0.5906,
1559
+ "step": 5375
1560
+ },
1561
+ {
1562
+ "epoch": 2.16,
1563
+ "grad_norm": 11.078606605529785,
1564
+ "learning_rate": 4.444444444444444e-06,
1565
+ "loss": 0.622,
1566
+ "step": 5400
1567
+ },
1568
+ {
1569
+ "epoch": 2.17,
1570
+ "grad_norm": 10.944733619689941,
1571
+ "learning_rate": 4.413580246913581e-06,
1572
+ "loss": 0.6154,
1573
+ "step": 5425
1574
+ },
1575
+ {
1576
+ "epoch": 2.18,
1577
+ "grad_norm": 10.523876190185547,
1578
+ "learning_rate": 4.382716049382716e-06,
1579
+ "loss": 0.6698,
1580
+ "step": 5450
1581
+ },
1582
+ {
1583
+ "epoch": 2.19,
1584
+ "grad_norm": 12.090290069580078,
1585
+ "learning_rate": 4.351851851851852e-06,
1586
+ "loss": 0.6269,
1587
+ "step": 5475
1588
+ },
1589
+ {
1590
+ "epoch": 2.2,
1591
+ "grad_norm": 9.12374210357666,
1592
+ "learning_rate": 4.3209876543209875e-06,
1593
+ "loss": 0.6209,
1594
+ "step": 5500
1595
+ },
1596
+ {
1597
+ "epoch": 2.21,
1598
+ "grad_norm": 11.668550491333008,
1599
+ "learning_rate": 4.290123456790124e-06,
1600
+ "loss": 0.6692,
1601
+ "step": 5525
1602
+ },
1603
+ {
1604
+ "epoch": 2.22,
1605
+ "grad_norm": 13.481975555419922,
1606
+ "learning_rate": 4.2592592592592596e-06,
1607
+ "loss": 0.6093,
1608
+ "step": 5550
1609
+ },
1610
+ {
1611
+ "epoch": 2.23,
1612
+ "grad_norm": 12.388972282409668,
1613
+ "learning_rate": 4.228395061728396e-06,
1614
+ "loss": 0.5941,
1615
+ "step": 5575
1616
+ },
1617
+ {
1618
+ "epoch": 2.24,
1619
+ "grad_norm": 9.559195518493652,
1620
+ "learning_rate": 4.197530864197531e-06,
1621
+ "loss": 0.5782,
1622
+ "step": 5600
1623
+ },
1624
+ {
1625
+ "epoch": 2.25,
1626
+ "grad_norm": 10.876689910888672,
1627
+ "learning_rate": 4.166666666666667e-06,
1628
+ "loss": 0.5896,
1629
+ "step": 5625
1630
+ },
1631
+ {
1632
+ "epoch": 2.26,
1633
+ "grad_norm": 10.03794002532959,
1634
+ "learning_rate": 4.135802469135803e-06,
1635
+ "loss": 0.6243,
1636
+ "step": 5650
1637
+ },
1638
+ {
1639
+ "epoch": 2.27,
1640
+ "grad_norm": 11.717126846313477,
1641
+ "learning_rate": 4.104938271604938e-06,
1642
+ "loss": 0.6205,
1643
+ "step": 5675
1644
+ },
1645
+ {
1646
+ "epoch": 2.2800000000000002,
1647
+ "grad_norm": 13.434377670288086,
1648
+ "learning_rate": 4.074074074074074e-06,
1649
+ "loss": 0.6419,
1650
+ "step": 5700
1651
+ },
1652
+ {
1653
+ "epoch": 2.29,
1654
+ "grad_norm": 8.062933921813965,
1655
+ "learning_rate": 4.04320987654321e-06,
1656
+ "loss": 0.6042,
1657
+ "step": 5725
1658
+ },
1659
+ {
1660
+ "epoch": 2.3,
1661
+ "grad_norm": 11.512144088745117,
1662
+ "learning_rate": 4.012345679012346e-06,
1663
+ "loss": 0.5766,
1664
+ "step": 5750
1665
+ },
1666
+ {
1667
+ "epoch": 2.31,
1668
+ "grad_norm": 12.437589645385742,
1669
+ "learning_rate": 3.9814814814814814e-06,
1670
+ "loss": 0.6139,
1671
+ "step": 5775
1672
+ },
1673
+ {
1674
+ "epoch": 2.32,
1675
+ "grad_norm": 13.863161087036133,
1676
+ "learning_rate": 3.9506172839506175e-06,
1677
+ "loss": 0.6477,
1678
+ "step": 5800
1679
+ },
1680
+ {
1681
+ "epoch": 2.33,
1682
+ "grad_norm": 12.266985893249512,
1683
+ "learning_rate": 3.9197530864197535e-06,
1684
+ "loss": 0.5743,
1685
+ "step": 5825
1686
+ },
1687
+ {
1688
+ "epoch": 2.34,
1689
+ "grad_norm": 9.438658714294434,
1690
+ "learning_rate": 3.88888888888889e-06,
1691
+ "loss": 0.5324,
1692
+ "step": 5850
1693
+ },
1694
+ {
1695
+ "epoch": 2.35,
1696
+ "grad_norm": 10.063249588012695,
1697
+ "learning_rate": 3.858024691358025e-06,
1698
+ "loss": 0.5516,
1699
+ "step": 5875
1700
+ },
1701
+ {
1702
+ "epoch": 2.36,
1703
+ "grad_norm": 11.568090438842773,
1704
+ "learning_rate": 3.827160493827161e-06,
1705
+ "loss": 0.6009,
1706
+ "step": 5900
1707
+ },
1708
+ {
1709
+ "epoch": 2.37,
1710
+ "grad_norm": 12.200023651123047,
1711
+ "learning_rate": 3.796296296296297e-06,
1712
+ "loss": 0.5942,
1713
+ "step": 5925
1714
+ },
1715
+ {
1716
+ "epoch": 2.38,
1717
+ "grad_norm": 9.803306579589844,
1718
+ "learning_rate": 3.7654320987654325e-06,
1719
+ "loss": 0.5866,
1720
+ "step": 5950
1721
+ },
1722
+ {
1723
+ "epoch": 2.39,
1724
+ "grad_norm": 10.597114562988281,
1725
+ "learning_rate": 3.734567901234568e-06,
1726
+ "loss": 0.6058,
1727
+ "step": 5975
1728
+ },
1729
+ {
1730
+ "epoch": 2.4,
1731
+ "grad_norm": 11.030874252319336,
1732
+ "learning_rate": 3.7037037037037037e-06,
1733
+ "loss": 0.6211,
1734
+ "step": 6000
1735
+ },
1736
+ {
1737
+ "epoch": 2.4,
1738
+ "eval_cer": 49.85901151405969,
1739
+ "eval_loss": 0.9176779985427856,
1740
+ "eval_runtime": 1772.9346,
1741
+ "eval_samples_per_second": 2.22,
1742
+ "eval_steps_per_second": 0.278,
1743
+ "step": 6000
1744
  }
1745
  ],
1746
  "logging_steps": 25,
1747
+ "max_steps": 9000,
1748
  "num_input_tokens_seen": 0,
1749
+ "num_train_epochs": 4,
1750
  "save_steps": 1000,
1751
+ "total_flos": 2.770419843072e+19,
1752
  "train_batch_size": 8,
1753
  "trial_name": null,
1754
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c3e5a033637a03b3c288e7f28b5f4eb2b2c22389eb467e77f64839f3948fe5f
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:239dc06417f57db987ec4b8fd6d7236222d6262229a2d5b5d78a349f1ba73d59
3
  size 5176