common6 / trainer_state.json
ghofrani's picture
End of training
6e66041
raw
history blame
63.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 99.99750623441396,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 6e-06,
"loss": 1.0901,
"step": 10
},
{
"epoch": 0.4,
"learning_rate": 1.2e-05,
"loss": 1.0728,
"step": 20
},
{
"epoch": 0.6,
"learning_rate": 1.8e-05,
"loss": 1.0838,
"step": 30
},
{
"epoch": 0.8,
"learning_rate": 2.4e-05,
"loss": 1.0591,
"step": 40
},
{
"epoch": 1.0,
"learning_rate": 3e-05,
"loss": 1.0419,
"step": 50
},
{
"epoch": 1.2,
"learning_rate": 3.6e-05,
"loss": 1.0973,
"step": 60
},
{
"epoch": 1.4,
"learning_rate": 4.2e-05,
"loss": 1.07,
"step": 70
},
{
"epoch": 1.6,
"learning_rate": 4.8e-05,
"loss": 1.0816,
"step": 80
},
{
"epoch": 1.8,
"learning_rate": 5.4000000000000005e-05,
"loss": 1.0822,
"step": 90
},
{
"epoch": 2.0,
"learning_rate": 6e-05,
"loss": 1.0825,
"step": 100
},
{
"epoch": 2.2,
"learning_rate": 5.9877551020408165e-05,
"loss": 1.1134,
"step": 110
},
{
"epoch": 2.4,
"learning_rate": 5.975510204081633e-05,
"loss": 1.1079,
"step": 120
},
{
"epoch": 2.6,
"learning_rate": 5.963265306122449e-05,
"loss": 1.1017,
"step": 130
},
{
"epoch": 2.8,
"learning_rate": 5.9510204081632654e-05,
"loss": 1.1001,
"step": 140
},
{
"epoch": 3.0,
"learning_rate": 5.938775510204082e-05,
"loss": 1.0689,
"step": 150
},
{
"epoch": 3.2,
"learning_rate": 5.926530612244898e-05,
"loss": 1.1165,
"step": 160
},
{
"epoch": 3.4,
"learning_rate": 5.914285714285715e-05,
"loss": 1.0812,
"step": 170
},
{
"epoch": 3.6,
"learning_rate": 5.9020408163265306e-05,
"loss": 1.0963,
"step": 180
},
{
"epoch": 3.8,
"learning_rate": 5.889795918367347e-05,
"loss": 1.0757,
"step": 190
},
{
"epoch": 4.0,
"learning_rate": 5.877551020408163e-05,
"loss": 1.0759,
"step": 200
},
{
"epoch": 4.2,
"learning_rate": 5.8653061224489795e-05,
"loss": 1.1081,
"step": 210
},
{
"epoch": 4.4,
"learning_rate": 5.8530612244897965e-05,
"loss": 1.0893,
"step": 220
},
{
"epoch": 4.6,
"learning_rate": 5.840816326530613e-05,
"loss": 1.0849,
"step": 230
},
{
"epoch": 4.8,
"learning_rate": 5.8285714285714284e-05,
"loss": 1.0753,
"step": 240
},
{
"epoch": 5.0,
"learning_rate": 5.816326530612245e-05,
"loss": 1.0611,
"step": 250
},
{
"epoch": 5.2,
"learning_rate": 5.804081632653062e-05,
"loss": 1.0977,
"step": 260
},
{
"epoch": 5.4,
"learning_rate": 5.791836734693878e-05,
"loss": 1.0702,
"step": 270
},
{
"epoch": 5.6,
"learning_rate": 5.779591836734694e-05,
"loss": 1.0886,
"step": 280
},
{
"epoch": 5.8,
"learning_rate": 5.76734693877551e-05,
"loss": 1.073,
"step": 290
},
{
"epoch": 6.0,
"learning_rate": 5.755102040816326e-05,
"loss": 1.0663,
"step": 300
},
{
"epoch": 6.2,
"learning_rate": 5.742857142857143e-05,
"loss": 1.0989,
"step": 310
},
{
"epoch": 6.4,
"learning_rate": 5.7306122448979596e-05,
"loss": 1.0773,
"step": 320
},
{
"epoch": 6.6,
"learning_rate": 5.718367346938776e-05,
"loss": 1.0682,
"step": 330
},
{
"epoch": 6.8,
"learning_rate": 5.7061224489795915e-05,
"loss": 1.0759,
"step": 340
},
{
"epoch": 7.0,
"learning_rate": 5.6938775510204085e-05,
"loss": 1.0497,
"step": 350
},
{
"epoch": 7.2,
"learning_rate": 5.681632653061225e-05,
"loss": 1.0923,
"step": 360
},
{
"epoch": 7.4,
"learning_rate": 5.669387755102041e-05,
"loss": 1.0503,
"step": 370
},
{
"epoch": 7.6,
"learning_rate": 5.6571428571428574e-05,
"loss": 1.0539,
"step": 380
},
{
"epoch": 7.8,
"learning_rate": 5.644897959183674e-05,
"loss": 1.0693,
"step": 390
},
{
"epoch": 8.0,
"learning_rate": 5.63265306122449e-05,
"loss": 1.0338,
"step": 400
},
{
"epoch": 8.2,
"learning_rate": 5.620408163265306e-05,
"loss": 1.0612,
"step": 410
},
{
"epoch": 8.4,
"learning_rate": 5.6081632653061226e-05,
"loss": 1.0647,
"step": 420
},
{
"epoch": 8.6,
"learning_rate": 5.595918367346939e-05,
"loss": 1.064,
"step": 430
},
{
"epoch": 8.8,
"learning_rate": 5.583673469387755e-05,
"loss": 1.0591,
"step": 440
},
{
"epoch": 9.0,
"learning_rate": 5.5714285714285715e-05,
"loss": 1.0207,
"step": 450
},
{
"epoch": 9.2,
"learning_rate": 5.559183673469388e-05,
"loss": 1.0733,
"step": 460
},
{
"epoch": 9.4,
"learning_rate": 5.546938775510204e-05,
"loss": 1.0442,
"step": 470
},
{
"epoch": 9.6,
"learning_rate": 5.5346938775510204e-05,
"loss": 1.0697,
"step": 480
},
{
"epoch": 9.8,
"learning_rate": 5.5224489795918374e-05,
"loss": 1.0628,
"step": 490
},
{
"epoch": 10.0,
"learning_rate": 5.510204081632653e-05,
"loss": 1.0344,
"step": 500
},
{
"epoch": 10.0,
"eval_loss": 0.40431472659111023,
"eval_runtime": 247.681,
"eval_samples_per_second": 21.043,
"eval_steps_per_second": 1.316,
"eval_wer": 0.451111573905347,
"step": 500
},
{
"epoch": 10.2,
"learning_rate": 5.4979591836734694e-05,
"loss": 1.0725,
"step": 510
},
{
"epoch": 10.4,
"learning_rate": 5.4857142857142857e-05,
"loss": 1.0514,
"step": 520
},
{
"epoch": 10.6,
"learning_rate": 5.473469387755102e-05,
"loss": 1.0477,
"step": 530
},
{
"epoch": 10.8,
"learning_rate": 5.461224489795919e-05,
"loss": 1.0551,
"step": 540
},
{
"epoch": 11.0,
"learning_rate": 5.4489795918367346e-05,
"loss": 1.0322,
"step": 550
},
{
"epoch": 11.2,
"learning_rate": 5.436734693877551e-05,
"loss": 1.0435,
"step": 560
},
{
"epoch": 11.4,
"learning_rate": 5.424489795918367e-05,
"loss": 1.0594,
"step": 570
},
{
"epoch": 11.6,
"learning_rate": 5.412244897959184e-05,
"loss": 1.0419,
"step": 580
},
{
"epoch": 11.8,
"learning_rate": 5.4000000000000005e-05,
"loss": 1.0291,
"step": 590
},
{
"epoch": 12.0,
"learning_rate": 5.387755102040817e-05,
"loss": 1.0314,
"step": 600
},
{
"epoch": 12.2,
"learning_rate": 5.3755102040816324e-05,
"loss": 1.0598,
"step": 610
},
{
"epoch": 12.4,
"learning_rate": 5.363265306122449e-05,
"loss": 1.0412,
"step": 620
},
{
"epoch": 12.6,
"learning_rate": 5.351020408163266e-05,
"loss": 1.0483,
"step": 630
},
{
"epoch": 12.8,
"learning_rate": 5.338775510204082e-05,
"loss": 1.0448,
"step": 640
},
{
"epoch": 13.0,
"learning_rate": 5.326530612244898e-05,
"loss": 0.9938,
"step": 650
},
{
"epoch": 13.2,
"learning_rate": 5.314285714285714e-05,
"loss": 1.053,
"step": 660
},
{
"epoch": 13.4,
"learning_rate": 5.302040816326531e-05,
"loss": 1.0178,
"step": 670
},
{
"epoch": 13.6,
"learning_rate": 5.289795918367347e-05,
"loss": 1.0174,
"step": 680
},
{
"epoch": 13.8,
"learning_rate": 5.2775510204081635e-05,
"loss": 1.0231,
"step": 690
},
{
"epoch": 14.0,
"learning_rate": 5.26530612244898e-05,
"loss": 1.0033,
"step": 700
},
{
"epoch": 14.2,
"learning_rate": 5.2530612244897955e-05,
"loss": 1.0444,
"step": 710
},
{
"epoch": 14.4,
"learning_rate": 5.2408163265306124e-05,
"loss": 0.9893,
"step": 720
},
{
"epoch": 14.6,
"learning_rate": 5.228571428571429e-05,
"loss": 1.0216,
"step": 730
},
{
"epoch": 14.8,
"learning_rate": 5.216326530612245e-05,
"loss": 1.0247,
"step": 740
},
{
"epoch": 15.0,
"learning_rate": 5.2040816326530614e-05,
"loss": 1.0022,
"step": 750
},
{
"epoch": 15.2,
"learning_rate": 5.191836734693878e-05,
"loss": 1.032,
"step": 760
},
{
"epoch": 15.4,
"learning_rate": 5.179591836734694e-05,
"loss": 0.9963,
"step": 770
},
{
"epoch": 15.6,
"learning_rate": 5.16734693877551e-05,
"loss": 0.9952,
"step": 780
},
{
"epoch": 15.8,
"learning_rate": 5.1551020408163266e-05,
"loss": 1.0021,
"step": 790
},
{
"epoch": 16.0,
"learning_rate": 5.142857142857143e-05,
"loss": 1.0134,
"step": 800
},
{
"epoch": 16.2,
"learning_rate": 5.13061224489796e-05,
"loss": 1.0319,
"step": 810
},
{
"epoch": 16.4,
"learning_rate": 5.1183673469387755e-05,
"loss": 0.9963,
"step": 820
},
{
"epoch": 16.6,
"learning_rate": 5.106122448979592e-05,
"loss": 0.9966,
"step": 830
},
{
"epoch": 16.8,
"learning_rate": 5.093877551020408e-05,
"loss": 1.0023,
"step": 840
},
{
"epoch": 17.0,
"learning_rate": 5.081632653061225e-05,
"loss": 0.9739,
"step": 850
},
{
"epoch": 17.2,
"learning_rate": 5.0693877551020414e-05,
"loss": 1.0182,
"step": 860
},
{
"epoch": 17.4,
"learning_rate": 5.057142857142857e-05,
"loss": 0.9884,
"step": 870
},
{
"epoch": 17.6,
"learning_rate": 5.044897959183673e-05,
"loss": 0.9623,
"step": 880
},
{
"epoch": 17.8,
"learning_rate": 5.0326530612244896e-05,
"loss": 1.0061,
"step": 890
},
{
"epoch": 18.0,
"learning_rate": 5.0204081632653066e-05,
"loss": 0.9684,
"step": 900
},
{
"epoch": 18.2,
"learning_rate": 5.008163265306123e-05,
"loss": 1.0066,
"step": 910
},
{
"epoch": 18.4,
"learning_rate": 4.9959183673469386e-05,
"loss": 0.9779,
"step": 920
},
{
"epoch": 18.6,
"learning_rate": 4.983673469387755e-05,
"loss": 0.9784,
"step": 930
},
{
"epoch": 18.8,
"learning_rate": 4.971428571428572e-05,
"loss": 0.9869,
"step": 940
},
{
"epoch": 19.0,
"learning_rate": 4.959183673469388e-05,
"loss": 0.9561,
"step": 950
},
{
"epoch": 19.2,
"learning_rate": 4.9469387755102045e-05,
"loss": 1.0003,
"step": 960
},
{
"epoch": 19.4,
"learning_rate": 4.93469387755102e-05,
"loss": 0.9838,
"step": 970
},
{
"epoch": 19.6,
"learning_rate": 4.9224489795918364e-05,
"loss": 0.9969,
"step": 980
},
{
"epoch": 19.8,
"learning_rate": 4.9102040816326534e-05,
"loss": 1.0037,
"step": 990
},
{
"epoch": 20.0,
"learning_rate": 4.89795918367347e-05,
"loss": 0.9651,
"step": 1000
},
{
"epoch": 20.0,
"eval_loss": 0.3793290853500366,
"eval_runtime": 244.7376,
"eval_samples_per_second": 21.296,
"eval_steps_per_second": 1.332,
"eval_wer": 0.4159421044410892,
"step": 1000
},
{
"epoch": 20.2,
"learning_rate": 4.885714285714286e-05,
"loss": 1.0027,
"step": 1010
},
{
"epoch": 20.4,
"learning_rate": 4.873469387755102e-05,
"loss": 0.9598,
"step": 1020
},
{
"epoch": 20.6,
"learning_rate": 4.862448979591836e-05,
"loss": 0.979,
"step": 1030
},
{
"epoch": 20.8,
"learning_rate": 4.850204081632653e-05,
"loss": 0.9886,
"step": 1040
},
{
"epoch": 21.0,
"learning_rate": 4.8379591836734696e-05,
"loss": 0.9584,
"step": 1050
},
{
"epoch": 21.2,
"learning_rate": 4.825714285714286e-05,
"loss": 0.984,
"step": 1060
},
{
"epoch": 21.4,
"learning_rate": 4.813469387755102e-05,
"loss": 0.9586,
"step": 1070
},
{
"epoch": 21.6,
"learning_rate": 4.8012244897959185e-05,
"loss": 0.9669,
"step": 1080
},
{
"epoch": 21.8,
"learning_rate": 4.788979591836735e-05,
"loss": 0.9629,
"step": 1090
},
{
"epoch": 22.0,
"learning_rate": 4.776734693877551e-05,
"loss": 0.9468,
"step": 1100
},
{
"epoch": 22.2,
"learning_rate": 4.7644897959183674e-05,
"loss": 1.0055,
"step": 1110
},
{
"epoch": 22.4,
"learning_rate": 4.752244897959184e-05,
"loss": 0.9698,
"step": 1120
},
{
"epoch": 22.6,
"learning_rate": 4.74e-05,
"loss": 0.9557,
"step": 1130
},
{
"epoch": 22.8,
"learning_rate": 4.727755102040816e-05,
"loss": 0.9648,
"step": 1140
},
{
"epoch": 23.0,
"learning_rate": 4.7155102040816326e-05,
"loss": 0.9681,
"step": 1150
},
{
"epoch": 23.2,
"learning_rate": 4.703265306122449e-05,
"loss": 0.995,
"step": 1160
},
{
"epoch": 23.4,
"learning_rate": 4.691020408163266e-05,
"loss": 0.9415,
"step": 1170
},
{
"epoch": 23.6,
"learning_rate": 4.678775510204082e-05,
"loss": 0.955,
"step": 1180
},
{
"epoch": 23.8,
"learning_rate": 4.666530612244898e-05,
"loss": 0.9647,
"step": 1190
},
{
"epoch": 24.0,
"learning_rate": 4.654285714285714e-05,
"loss": 0.9572,
"step": 1200
},
{
"epoch": 24.2,
"learning_rate": 4.6420408163265305e-05,
"loss": 1.0043,
"step": 1210
},
{
"epoch": 24.4,
"learning_rate": 4.6297959183673474e-05,
"loss": 0.9545,
"step": 1220
},
{
"epoch": 24.6,
"learning_rate": 4.617551020408164e-05,
"loss": 0.9603,
"step": 1230
},
{
"epoch": 24.8,
"learning_rate": 4.6053061224489794e-05,
"loss": 0.9563,
"step": 1240
},
{
"epoch": 25.0,
"learning_rate": 4.593061224489796e-05,
"loss": 0.939,
"step": 1250
},
{
"epoch": 25.2,
"learning_rate": 4.580816326530613e-05,
"loss": 0.9755,
"step": 1260
},
{
"epoch": 25.4,
"learning_rate": 4.568571428571429e-05,
"loss": 0.9619,
"step": 1270
},
{
"epoch": 25.6,
"learning_rate": 4.556326530612245e-05,
"loss": 0.951,
"step": 1280
},
{
"epoch": 25.8,
"learning_rate": 4.544081632653061e-05,
"loss": 0.9442,
"step": 1290
},
{
"epoch": 26.0,
"learning_rate": 4.531836734693877e-05,
"loss": 0.9448,
"step": 1300
},
{
"epoch": 26.2,
"learning_rate": 4.519591836734694e-05,
"loss": 0.984,
"step": 1310
},
{
"epoch": 26.4,
"learning_rate": 4.5073469387755105e-05,
"loss": 0.9407,
"step": 1320
},
{
"epoch": 26.6,
"learning_rate": 4.495102040816327e-05,
"loss": 0.9506,
"step": 1330
},
{
"epoch": 26.8,
"learning_rate": 4.4828571428571424e-05,
"loss": 0.9393,
"step": 1340
},
{
"epoch": 27.0,
"learning_rate": 4.4706122448979594e-05,
"loss": 0.9513,
"step": 1350
},
{
"epoch": 27.2,
"learning_rate": 4.458367346938776e-05,
"loss": 0.9636,
"step": 1360
},
{
"epoch": 27.4,
"learning_rate": 4.446122448979592e-05,
"loss": 0.9441,
"step": 1370
},
{
"epoch": 27.6,
"learning_rate": 4.433877551020408e-05,
"loss": 0.9476,
"step": 1380
},
{
"epoch": 27.8,
"learning_rate": 4.4216326530612246e-05,
"loss": 0.9438,
"step": 1390
},
{
"epoch": 28.0,
"learning_rate": 4.409387755102041e-05,
"loss": 0.9376,
"step": 1400
},
{
"epoch": 28.2,
"learning_rate": 4.397142857142857e-05,
"loss": 0.9699,
"step": 1410
},
{
"epoch": 28.4,
"learning_rate": 4.3848979591836736e-05,
"loss": 0.9719,
"step": 1420
},
{
"epoch": 28.6,
"learning_rate": 4.37265306122449e-05,
"loss": 0.9481,
"step": 1430
},
{
"epoch": 28.8,
"learning_rate": 4.360408163265307e-05,
"loss": 0.9495,
"step": 1440
},
{
"epoch": 29.0,
"learning_rate": 4.3481632653061225e-05,
"loss": 0.9208,
"step": 1450
},
{
"epoch": 29.2,
"learning_rate": 4.335918367346939e-05,
"loss": 0.9725,
"step": 1460
},
{
"epoch": 29.4,
"learning_rate": 4.323673469387755e-05,
"loss": 0.9317,
"step": 1470
},
{
"epoch": 29.6,
"learning_rate": 4.3114285714285714e-05,
"loss": 0.9402,
"step": 1480
},
{
"epoch": 29.8,
"learning_rate": 4.2991836734693884e-05,
"loss": 0.9431,
"step": 1490
},
{
"epoch": 30.0,
"learning_rate": 4.286938775510204e-05,
"loss": 0.9125,
"step": 1500
},
{
"epoch": 30.0,
"eval_loss": 0.3755570352077484,
"eval_runtime": 246.7652,
"eval_samples_per_second": 21.121,
"eval_steps_per_second": 1.321,
"eval_wer": 0.4046441401572343,
"step": 1500
},
{
"epoch": 30.2,
"learning_rate": 4.27469387755102e-05,
"loss": 0.9531,
"step": 1510
},
{
"epoch": 30.4,
"learning_rate": 4.2624489795918366e-05,
"loss": 0.9172,
"step": 1520
},
{
"epoch": 30.6,
"learning_rate": 4.250204081632653e-05,
"loss": 0.9438,
"step": 1530
},
{
"epoch": 30.8,
"learning_rate": 4.23795918367347e-05,
"loss": 0.9405,
"step": 1540
},
{
"epoch": 31.0,
"learning_rate": 4.2257142857142855e-05,
"loss": 0.9209,
"step": 1550
},
{
"epoch": 31.2,
"learning_rate": 4.213469387755102e-05,
"loss": 0.9827,
"step": 1560
},
{
"epoch": 31.4,
"learning_rate": 4.201224489795918e-05,
"loss": 0.9163,
"step": 1570
},
{
"epoch": 31.6,
"learning_rate": 4.188979591836735e-05,
"loss": 0.9175,
"step": 1580
},
{
"epoch": 31.8,
"learning_rate": 4.1767346938775514e-05,
"loss": 0.9288,
"step": 1590
},
{
"epoch": 32.0,
"learning_rate": 4.164489795918368e-05,
"loss": 0.8951,
"step": 1600
},
{
"epoch": 32.2,
"learning_rate": 4.1522448979591834e-05,
"loss": 0.9582,
"step": 1610
},
{
"epoch": 32.4,
"learning_rate": 4.14e-05,
"loss": 0.9183,
"step": 1620
},
{
"epoch": 32.6,
"learning_rate": 4.1277551020408166e-05,
"loss": 0.9113,
"step": 1630
},
{
"epoch": 32.8,
"learning_rate": 4.115510204081633e-05,
"loss": 0.9305,
"step": 1640
},
{
"epoch": 33.0,
"learning_rate": 4.103265306122449e-05,
"loss": 0.9159,
"step": 1650
},
{
"epoch": 33.2,
"learning_rate": 4.091020408163265e-05,
"loss": 0.9706,
"step": 1660
},
{
"epoch": 33.4,
"learning_rate": 4.078775510204082e-05,
"loss": 0.9359,
"step": 1670
},
{
"epoch": 33.6,
"learning_rate": 4.066530612244898e-05,
"loss": 0.916,
"step": 1680
},
{
"epoch": 33.8,
"learning_rate": 4.0542857142857145e-05,
"loss": 0.9333,
"step": 1690
},
{
"epoch": 34.0,
"learning_rate": 4.042040816326531e-05,
"loss": 0.892,
"step": 1700
},
{
"epoch": 34.2,
"learning_rate": 4.0297959183673464e-05,
"loss": 0.9556,
"step": 1710
},
{
"epoch": 34.4,
"learning_rate": 4.0175510204081634e-05,
"loss": 0.8906,
"step": 1720
},
{
"epoch": 34.6,
"learning_rate": 4.00530612244898e-05,
"loss": 0.9183,
"step": 1730
},
{
"epoch": 34.8,
"learning_rate": 3.993061224489796e-05,
"loss": 0.9262,
"step": 1740
},
{
"epoch": 35.0,
"learning_rate": 3.980816326530612e-05,
"loss": 0.9276,
"step": 1750
},
{
"epoch": 35.2,
"learning_rate": 3.968571428571429e-05,
"loss": 0.9383,
"step": 1760
},
{
"epoch": 35.4,
"learning_rate": 3.956326530612245e-05,
"loss": 0.9247,
"step": 1770
},
{
"epoch": 35.6,
"learning_rate": 3.944081632653061e-05,
"loss": 0.9339,
"step": 1780
},
{
"epoch": 35.8,
"learning_rate": 3.9318367346938775e-05,
"loss": 0.8878,
"step": 1790
},
{
"epoch": 36.0,
"learning_rate": 3.919591836734694e-05,
"loss": 0.9035,
"step": 1800
},
{
"epoch": 36.2,
"learning_rate": 3.907346938775511e-05,
"loss": 0.9587,
"step": 1810
},
{
"epoch": 36.4,
"learning_rate": 3.8951020408163265e-05,
"loss": 0.8989,
"step": 1820
},
{
"epoch": 36.6,
"learning_rate": 3.882857142857143e-05,
"loss": 0.922,
"step": 1830
},
{
"epoch": 36.8,
"learning_rate": 3.870612244897959e-05,
"loss": 0.9278,
"step": 1840
},
{
"epoch": 37.0,
"learning_rate": 3.858367346938776e-05,
"loss": 0.8979,
"step": 1850
},
{
"epoch": 37.2,
"learning_rate": 3.8461224489795924e-05,
"loss": 0.9302,
"step": 1860
},
{
"epoch": 37.4,
"learning_rate": 3.833877551020408e-05,
"loss": 0.9078,
"step": 1870
},
{
"epoch": 37.6,
"learning_rate": 3.821632653061224e-05,
"loss": 0.919,
"step": 1880
},
{
"epoch": 37.8,
"learning_rate": 3.8093877551020406e-05,
"loss": 0.9152,
"step": 1890
},
{
"epoch": 38.0,
"learning_rate": 3.7971428571428576e-05,
"loss": 0.8914,
"step": 1900
},
{
"epoch": 38.2,
"learning_rate": 3.784897959183674e-05,
"loss": 0.9297,
"step": 1910
},
{
"epoch": 38.4,
"learning_rate": 3.7726530612244895e-05,
"loss": 0.9091,
"step": 1920
},
{
"epoch": 38.6,
"learning_rate": 3.760408163265306e-05,
"loss": 0.9085,
"step": 1930
},
{
"epoch": 38.8,
"learning_rate": 3.748163265306123e-05,
"loss": 0.9078,
"step": 1940
},
{
"epoch": 39.0,
"learning_rate": 3.735918367346939e-05,
"loss": 0.8804,
"step": 1950
},
{
"epoch": 39.2,
"learning_rate": 3.7236734693877554e-05,
"loss": 0.9208,
"step": 1960
},
{
"epoch": 39.4,
"learning_rate": 3.711428571428572e-05,
"loss": 0.8786,
"step": 1970
},
{
"epoch": 39.6,
"learning_rate": 3.6991836734693873e-05,
"loss": 0.8991,
"step": 1980
},
{
"epoch": 39.8,
"learning_rate": 3.686938775510204e-05,
"loss": 0.9153,
"step": 1990
},
{
"epoch": 40.0,
"learning_rate": 3.6746938775510206e-05,
"loss": 0.8831,
"step": 2000
},
{
"epoch": 40.0,
"eval_loss": 0.3649730086326599,
"eval_runtime": 244.6059,
"eval_samples_per_second": 21.308,
"eval_steps_per_second": 1.333,
"eval_wer": 0.3875930650283751,
"step": 2000
},
{
"epoch": 40.2,
"learning_rate": 3.662448979591837e-05,
"loss": 0.8995,
"step": 2010
},
{
"epoch": 40.4,
"learning_rate": 3.650204081632653e-05,
"loss": 0.91,
"step": 2020
},
{
"epoch": 40.6,
"learning_rate": 3.6379591836734695e-05,
"loss": 0.9105,
"step": 2030
},
{
"epoch": 40.8,
"learning_rate": 3.625714285714286e-05,
"loss": 0.897,
"step": 2040
},
{
"epoch": 41.0,
"learning_rate": 3.613469387755102e-05,
"loss": 0.8869,
"step": 2050
},
{
"epoch": 41.2,
"learning_rate": 3.6012244897959185e-05,
"loss": 0.9227,
"step": 2060
},
{
"epoch": 41.4,
"learning_rate": 3.588979591836735e-05,
"loss": 0.8919,
"step": 2070
},
{
"epoch": 41.6,
"learning_rate": 3.576734693877551e-05,
"loss": 0.8926,
"step": 2080
},
{
"epoch": 41.8,
"learning_rate": 3.5644897959183674e-05,
"loss": 0.8912,
"step": 2090
},
{
"epoch": 42.0,
"learning_rate": 3.552244897959184e-05,
"loss": 0.8841,
"step": 2100
},
{
"epoch": 42.2,
"learning_rate": 3.54e-05,
"loss": 0.9165,
"step": 2110
},
{
"epoch": 42.4,
"learning_rate": 3.527755102040817e-05,
"loss": 0.8981,
"step": 2120
},
{
"epoch": 42.6,
"learning_rate": 3.5155102040816326e-05,
"loss": 0.9079,
"step": 2130
},
{
"epoch": 42.8,
"learning_rate": 3.503265306122449e-05,
"loss": 0.9017,
"step": 2140
},
{
"epoch": 43.0,
"learning_rate": 3.491020408163265e-05,
"loss": 0.8534,
"step": 2150
},
{
"epoch": 43.2,
"learning_rate": 3.4787755102040815e-05,
"loss": 0.9108,
"step": 2160
},
{
"epoch": 43.4,
"learning_rate": 3.4665306122448985e-05,
"loss": 0.8763,
"step": 2170
},
{
"epoch": 43.6,
"learning_rate": 3.454285714285715e-05,
"loss": 0.903,
"step": 2180
},
{
"epoch": 43.8,
"learning_rate": 3.4420408163265304e-05,
"loss": 0.8842,
"step": 2190
},
{
"epoch": 44.0,
"learning_rate": 3.429795918367347e-05,
"loss": 0.8646,
"step": 2200
},
{
"epoch": 44.2,
"learning_rate": 3.417551020408163e-05,
"loss": 0.921,
"step": 2210
},
{
"epoch": 44.4,
"learning_rate": 3.40530612244898e-05,
"loss": 0.8833,
"step": 2220
},
{
"epoch": 44.6,
"learning_rate": 3.393061224489796e-05,
"loss": 0.8826,
"step": 2230
},
{
"epoch": 44.8,
"learning_rate": 3.380816326530612e-05,
"loss": 0.8892,
"step": 2240
},
{
"epoch": 45.0,
"learning_rate": 3.368571428571428e-05,
"loss": 0.8688,
"step": 2250
},
{
"epoch": 45.2,
"learning_rate": 3.356326530612245e-05,
"loss": 0.92,
"step": 2260
},
{
"epoch": 45.4,
"learning_rate": 3.3440816326530616e-05,
"loss": 0.8728,
"step": 2270
},
{
"epoch": 45.6,
"learning_rate": 3.331836734693878e-05,
"loss": 0.8921,
"step": 2280
},
{
"epoch": 45.8,
"learning_rate": 3.3195918367346935e-05,
"loss": 0.9218,
"step": 2290
},
{
"epoch": 46.0,
"learning_rate": 3.30734693877551e-05,
"loss": 0.8695,
"step": 2300
},
{
"epoch": 46.2,
"learning_rate": 3.295102040816327e-05,
"loss": 0.9205,
"step": 2310
},
{
"epoch": 46.4,
"learning_rate": 3.282857142857143e-05,
"loss": 0.8813,
"step": 2320
},
{
"epoch": 46.6,
"learning_rate": 3.2706122448979594e-05,
"loss": 0.881,
"step": 2330
},
{
"epoch": 46.8,
"learning_rate": 3.258367346938776e-05,
"loss": 0.8744,
"step": 2340
},
{
"epoch": 47.0,
"learning_rate": 3.246122448979592e-05,
"loss": 0.8419,
"step": 2350
},
{
"epoch": 47.2,
"learning_rate": 3.233877551020408e-05,
"loss": 0.8919,
"step": 2360
},
{
"epoch": 47.4,
"learning_rate": 3.2216326530612246e-05,
"loss": 0.8741,
"step": 2370
},
{
"epoch": 47.6,
"learning_rate": 3.209387755102041e-05,
"loss": 0.8904,
"step": 2380
},
{
"epoch": 47.8,
"learning_rate": 3.197142857142857e-05,
"loss": 0.8948,
"step": 2390
},
{
"epoch": 48.0,
"learning_rate": 3.1848979591836735e-05,
"loss": 0.8513,
"step": 2400
},
{
"epoch": 48.2,
"learning_rate": 3.17265306122449e-05,
"loss": 0.89,
"step": 2410
},
{
"epoch": 48.4,
"learning_rate": 3.160408163265306e-05,
"loss": 0.8818,
"step": 2420
},
{
"epoch": 48.6,
"learning_rate": 3.1481632653061224e-05,
"loss": 0.8772,
"step": 2430
},
{
"epoch": 48.8,
"learning_rate": 3.1359183673469394e-05,
"loss": 0.8851,
"step": 2440
},
{
"epoch": 49.0,
"learning_rate": 3.123673469387755e-05,
"loss": 0.8568,
"step": 2450
},
{
"epoch": 49.2,
"learning_rate": 3.1114285714285714e-05,
"loss": 0.8949,
"step": 2460
},
{
"epoch": 49.4,
"learning_rate": 3.099183673469388e-05,
"loss": 0.8748,
"step": 2470
},
{
"epoch": 49.6,
"learning_rate": 3.086938775510204e-05,
"loss": 0.861,
"step": 2480
},
{
"epoch": 49.8,
"learning_rate": 3.074693877551021e-05,
"loss": 0.8878,
"step": 2490
},
{
"epoch": 50.0,
"learning_rate": 3.0624489795918366e-05,
"loss": 0.8399,
"step": 2500
},
{
"epoch": 50.0,
"eval_loss": 0.3605365753173828,
"eval_runtime": 244.9656,
"eval_samples_per_second": 21.276,
"eval_steps_per_second": 1.331,
"eval_wer": 0.377206226896444,
"step": 2500
},
{
"epoch": 50.2,
"learning_rate": 3.0502040816326532e-05,
"loss": 0.877,
"step": 2510
},
{
"epoch": 50.4,
"learning_rate": 3.0379591836734692e-05,
"loss": 0.8843,
"step": 2520
},
{
"epoch": 50.6,
"learning_rate": 3.0257142857142862e-05,
"loss": 0.8654,
"step": 2530
},
{
"epoch": 50.8,
"learning_rate": 3.013469387755102e-05,
"loss": 0.8465,
"step": 2540
},
{
"epoch": 51.0,
"learning_rate": 3.0012244897959184e-05,
"loss": 0.8448,
"step": 2550
},
{
"epoch": 51.2,
"learning_rate": 2.9889795918367348e-05,
"loss": 0.8887,
"step": 2560
},
{
"epoch": 51.4,
"learning_rate": 2.976734693877551e-05,
"loss": 0.8518,
"step": 2570
},
{
"epoch": 51.6,
"learning_rate": 2.9644897959183674e-05,
"loss": 0.887,
"step": 2580
},
{
"epoch": 51.8,
"learning_rate": 2.952244897959184e-05,
"loss": 0.8628,
"step": 2590
},
{
"epoch": 52.0,
"learning_rate": 2.94e-05,
"loss": 0.8681,
"step": 2600
},
{
"epoch": 52.2,
"learning_rate": 2.9277551020408166e-05,
"loss": 0.8941,
"step": 2610
},
{
"epoch": 52.4,
"learning_rate": 2.9155102040816326e-05,
"loss": 0.8754,
"step": 2620
},
{
"epoch": 52.6,
"learning_rate": 2.903265306122449e-05,
"loss": 0.8573,
"step": 2630
},
{
"epoch": 52.8,
"learning_rate": 2.8910204081632655e-05,
"loss": 0.8689,
"step": 2640
},
{
"epoch": 53.0,
"learning_rate": 2.8787755102040815e-05,
"loss": 0.8664,
"step": 2650
},
{
"epoch": 53.2,
"learning_rate": 2.866530612244898e-05,
"loss": 0.8923,
"step": 2660
},
{
"epoch": 53.4,
"learning_rate": 2.854285714285714e-05,
"loss": 0.8735,
"step": 2670
},
{
"epoch": 53.6,
"learning_rate": 2.8420408163265308e-05,
"loss": 0.8717,
"step": 2680
},
{
"epoch": 53.8,
"learning_rate": 2.829795918367347e-05,
"loss": 0.8558,
"step": 2690
},
{
"epoch": 54.0,
"learning_rate": 2.8175510204081634e-05,
"loss": 0.8226,
"step": 2700
},
{
"epoch": 54.2,
"learning_rate": 2.8053061224489797e-05,
"loss": 0.8947,
"step": 2710
},
{
"epoch": 54.4,
"learning_rate": 2.793061224489796e-05,
"loss": 0.8509,
"step": 2720
},
{
"epoch": 54.6,
"learning_rate": 2.7808163265306123e-05,
"loss": 0.8551,
"step": 2730
},
{
"epoch": 54.8,
"learning_rate": 2.7685714285714286e-05,
"loss": 0.8705,
"step": 2740
},
{
"epoch": 55.0,
"learning_rate": 2.756326530612245e-05,
"loss": 0.815,
"step": 2750
},
{
"epoch": 55.2,
"learning_rate": 2.7440816326530612e-05,
"loss": 0.8939,
"step": 2760
},
{
"epoch": 55.4,
"learning_rate": 2.731836734693878e-05,
"loss": 0.859,
"step": 2770
},
{
"epoch": 55.6,
"learning_rate": 2.7195918367346938e-05,
"loss": 0.8567,
"step": 2780
},
{
"epoch": 55.8,
"learning_rate": 2.7073469387755105e-05,
"loss": 0.8599,
"step": 2790
},
{
"epoch": 56.0,
"learning_rate": 2.6951020408163268e-05,
"loss": 0.8171,
"step": 2800
},
{
"epoch": 56.2,
"learning_rate": 2.6828571428571427e-05,
"loss": 0.896,
"step": 2810
},
{
"epoch": 56.4,
"learning_rate": 2.6706122448979594e-05,
"loss": 0.8554,
"step": 2820
},
{
"epoch": 56.6,
"learning_rate": 2.6583673469387753e-05,
"loss": 0.877,
"step": 2830
},
{
"epoch": 56.8,
"learning_rate": 2.646122448979592e-05,
"loss": 0.8576,
"step": 2840
},
{
"epoch": 57.0,
"learning_rate": 2.6338775510204083e-05,
"loss": 0.8321,
"step": 2850
},
{
"epoch": 57.2,
"learning_rate": 2.6216326530612246e-05,
"loss": 0.8606,
"step": 2860
},
{
"epoch": 57.4,
"learning_rate": 2.609387755102041e-05,
"loss": 0.8409,
"step": 2870
},
{
"epoch": 57.6,
"learning_rate": 2.5971428571428575e-05,
"loss": 0.8469,
"step": 2880
},
{
"epoch": 57.8,
"learning_rate": 2.5848979591836735e-05,
"loss": 0.8546,
"step": 2890
},
{
"epoch": 58.0,
"learning_rate": 2.5726530612244898e-05,
"loss": 0.829,
"step": 2900
},
{
"epoch": 58.2,
"learning_rate": 2.560408163265306e-05,
"loss": 0.8659,
"step": 2910
},
{
"epoch": 58.4,
"learning_rate": 2.5481632653061224e-05,
"loss": 0.877,
"step": 2920
},
{
"epoch": 58.6,
"learning_rate": 2.535918367346939e-05,
"loss": 0.8537,
"step": 2930
},
{
"epoch": 58.8,
"learning_rate": 2.523673469387755e-05,
"loss": 0.8489,
"step": 2940
},
{
"epoch": 59.0,
"learning_rate": 2.5114285714285717e-05,
"loss": 0.8184,
"step": 2950
},
{
"epoch": 59.2,
"learning_rate": 2.4991836734693876e-05,
"loss": 0.8597,
"step": 2960
},
{
"epoch": 59.4,
"learning_rate": 2.486938775510204e-05,
"loss": 0.8621,
"step": 2970
},
{
"epoch": 59.6,
"learning_rate": 2.4746938775510206e-05,
"loss": 0.8553,
"step": 2980
},
{
"epoch": 59.8,
"learning_rate": 2.4624489795918366e-05,
"loss": 0.8628,
"step": 2990
},
{
"epoch": 60.0,
"learning_rate": 2.4502040816326532e-05,
"loss": 0.819,
"step": 3000
},
{
"epoch": 60.0,
"eval_loss": 0.3622128367424011,
"eval_runtime": 245.6109,
"eval_samples_per_second": 21.221,
"eval_steps_per_second": 1.327,
"eval_wer": 0.3714270838756703,
"step": 3000
},
{
"epoch": 60.2,
"learning_rate": 2.4379591836734695e-05,
"loss": 0.8644,
"step": 3010
},
{
"epoch": 60.4,
"learning_rate": 2.4257142857142858e-05,
"loss": 0.8434,
"step": 3020
},
{
"epoch": 60.6,
"learning_rate": 2.413469387755102e-05,
"loss": 0.8402,
"step": 3030
},
{
"epoch": 60.8,
"learning_rate": 2.4012244897959184e-05,
"loss": 0.8412,
"step": 3040
},
{
"epoch": 61.0,
"learning_rate": 2.3889795918367347e-05,
"loss": 0.7999,
"step": 3050
},
{
"epoch": 61.2,
"learning_rate": 2.376734693877551e-05,
"loss": 0.8662,
"step": 3060
},
{
"epoch": 61.4,
"learning_rate": 2.3644897959183673e-05,
"loss": 0.8329,
"step": 3070
},
{
"epoch": 61.6,
"learning_rate": 2.3522448979591837e-05,
"loss": 0.8458,
"step": 3080
},
{
"epoch": 61.8,
"learning_rate": 2.3400000000000003e-05,
"loss": 0.8423,
"step": 3090
},
{
"epoch": 62.0,
"learning_rate": 2.3277551020408163e-05,
"loss": 0.8163,
"step": 3100
},
{
"epoch": 62.2,
"learning_rate": 2.315510204081633e-05,
"loss": 0.8645,
"step": 3110
},
{
"epoch": 62.4,
"learning_rate": 2.303265306122449e-05,
"loss": 0.8425,
"step": 3120
},
{
"epoch": 62.6,
"learning_rate": 2.2910204081632655e-05,
"loss": 0.8474,
"step": 3130
},
{
"epoch": 62.8,
"learning_rate": 2.2787755102040818e-05,
"loss": 0.8289,
"step": 3140
},
{
"epoch": 63.0,
"learning_rate": 2.2665306122448978e-05,
"loss": 0.8156,
"step": 3150
},
{
"epoch": 63.2,
"learning_rate": 2.2542857142857144e-05,
"loss": 0.8886,
"step": 3160
},
{
"epoch": 63.4,
"learning_rate": 2.243265306122449e-05,
"loss": 0.8355,
"step": 3170
},
{
"epoch": 63.6,
"learning_rate": 2.2310204081632654e-05,
"loss": 0.8291,
"step": 3180
},
{
"epoch": 63.8,
"learning_rate": 2.2187755102040817e-05,
"loss": 0.8588,
"step": 3190
},
{
"epoch": 64.0,
"learning_rate": 2.206530612244898e-05,
"loss": 0.8194,
"step": 3200
},
{
"epoch": 64.2,
"learning_rate": 2.1942857142857143e-05,
"loss": 0.8688,
"step": 3210
},
{
"epoch": 64.4,
"learning_rate": 2.1820408163265306e-05,
"loss": 0.8319,
"step": 3220
},
{
"epoch": 64.6,
"learning_rate": 2.169795918367347e-05,
"loss": 0.8505,
"step": 3230
},
{
"epoch": 64.8,
"learning_rate": 2.1575510204081632e-05,
"loss": 0.8435,
"step": 3240
},
{
"epoch": 65.0,
"learning_rate": 2.14530612244898e-05,
"loss": 0.8047,
"step": 3250
},
{
"epoch": 65.2,
"learning_rate": 2.133061224489796e-05,
"loss": 0.8568,
"step": 3260
},
{
"epoch": 65.4,
"learning_rate": 2.1208163265306125e-05,
"loss": 0.8305,
"step": 3270
},
{
"epoch": 65.6,
"learning_rate": 2.1085714285714285e-05,
"loss": 0.8424,
"step": 3280
},
{
"epoch": 65.8,
"learning_rate": 2.096326530612245e-05,
"loss": 0.8319,
"step": 3290
},
{
"epoch": 66.0,
"learning_rate": 2.0840816326530614e-05,
"loss": 0.823,
"step": 3300
},
{
"epoch": 66.2,
"learning_rate": 2.0718367346938774e-05,
"loss": 0.8754,
"step": 3310
},
{
"epoch": 66.4,
"learning_rate": 2.059591836734694e-05,
"loss": 0.8253,
"step": 3320
},
{
"epoch": 66.6,
"learning_rate": 2.04734693877551e-05,
"loss": 0.8429,
"step": 3330
},
{
"epoch": 66.8,
"learning_rate": 2.0351020408163266e-05,
"loss": 0.8286,
"step": 3340
},
{
"epoch": 67.0,
"learning_rate": 2.022857142857143e-05,
"loss": 0.8149,
"step": 3350
},
{
"epoch": 67.2,
"learning_rate": 2.0106122448979593e-05,
"loss": 0.8683,
"step": 3360
},
{
"epoch": 67.4,
"learning_rate": 1.9983673469387756e-05,
"loss": 0.8284,
"step": 3370
},
{
"epoch": 67.6,
"learning_rate": 1.986122448979592e-05,
"loss": 0.8363,
"step": 3380
},
{
"epoch": 67.8,
"learning_rate": 1.973877551020408e-05,
"loss": 0.8364,
"step": 3390
},
{
"epoch": 68.0,
"learning_rate": 1.9616326530612245e-05,
"loss": 0.8126,
"step": 3400
},
{
"epoch": 68.2,
"learning_rate": 1.9493877551020408e-05,
"loss": 0.8599,
"step": 3410
},
{
"epoch": 68.4,
"learning_rate": 1.937142857142857e-05,
"loss": 0.8182,
"step": 3420
},
{
"epoch": 68.6,
"learning_rate": 1.9248979591836737e-05,
"loss": 0.8344,
"step": 3430
},
{
"epoch": 68.8,
"learning_rate": 1.9126530612244897e-05,
"loss": 0.8158,
"step": 3440
},
{
"epoch": 69.0,
"learning_rate": 1.9004081632653063e-05,
"loss": 0.8117,
"step": 3450
},
{
"epoch": 69.2,
"learning_rate": 1.8881632653061226e-05,
"loss": 0.8421,
"step": 3460
},
{
"epoch": 69.4,
"learning_rate": 1.8759183673469386e-05,
"loss": 0.8264,
"step": 3470
},
{
"epoch": 69.6,
"learning_rate": 1.8636734693877553e-05,
"loss": 0.8356,
"step": 3480
},
{
"epoch": 69.8,
"learning_rate": 1.8514285714285712e-05,
"loss": 0.8316,
"step": 3490
},
{
"epoch": 70.0,
"learning_rate": 1.839183673469388e-05,
"loss": 0.8029,
"step": 3500
},
{
"epoch": 70.0,
"eval_loss": 0.3561089038848877,
"eval_runtime": 245.8818,
"eval_samples_per_second": 21.197,
"eval_steps_per_second": 1.326,
"eval_wer": 0.3663768417764357,
"step": 3500
},
{
"epoch": 70.2,
"learning_rate": 1.8269387755102042e-05,
"loss": 0.8579,
"step": 3510
},
{
"epoch": 70.4,
"learning_rate": 1.8146938775510205e-05,
"loss": 0.8511,
"step": 3520
},
{
"epoch": 70.6,
"learning_rate": 1.8024489795918368e-05,
"loss": 0.8437,
"step": 3530
},
{
"epoch": 70.8,
"learning_rate": 1.790204081632653e-05,
"loss": 0.8526,
"step": 3540
},
{
"epoch": 71.0,
"learning_rate": 1.7779591836734694e-05,
"loss": 0.8014,
"step": 3550
},
{
"epoch": 71.2,
"learning_rate": 1.7657142857142857e-05,
"loss": 0.868,
"step": 3560
},
{
"epoch": 71.4,
"learning_rate": 1.753469387755102e-05,
"loss": 0.8531,
"step": 3570
},
{
"epoch": 71.6,
"learning_rate": 1.7412244897959183e-05,
"loss": 0.8555,
"step": 3580
},
{
"epoch": 71.8,
"learning_rate": 1.728979591836735e-05,
"loss": 0.8413,
"step": 3590
},
{
"epoch": 72.0,
"learning_rate": 1.716734693877551e-05,
"loss": 0.8216,
"step": 3600
},
{
"epoch": 72.2,
"learning_rate": 1.7044897959183676e-05,
"loss": 0.8611,
"step": 3610
},
{
"epoch": 72.4,
"learning_rate": 1.6922448979591835e-05,
"loss": 0.8393,
"step": 3620
},
{
"epoch": 72.6,
"learning_rate": 1.6800000000000002e-05,
"loss": 0.8229,
"step": 3630
},
{
"epoch": 72.8,
"learning_rate": 1.6677551020408165e-05,
"loss": 0.8375,
"step": 3640
},
{
"epoch": 73.0,
"learning_rate": 1.6555102040816325e-05,
"loss": 0.8119,
"step": 3650
},
{
"epoch": 73.2,
"learning_rate": 1.643265306122449e-05,
"loss": 0.8706,
"step": 3660
},
{
"epoch": 73.4,
"learning_rate": 1.6310204081632654e-05,
"loss": 0.829,
"step": 3670
},
{
"epoch": 73.6,
"learning_rate": 1.6187755102040817e-05,
"loss": 0.8246,
"step": 3680
},
{
"epoch": 73.8,
"learning_rate": 1.606530612244898e-05,
"loss": 0.8462,
"step": 3690
},
{
"epoch": 74.0,
"learning_rate": 1.5942857142857143e-05,
"loss": 0.8305,
"step": 3700
},
{
"epoch": 74.2,
"learning_rate": 1.5820408163265306e-05,
"loss": 0.8568,
"step": 3710
},
{
"epoch": 74.4,
"learning_rate": 1.569795918367347e-05,
"loss": 0.8283,
"step": 3720
},
{
"epoch": 74.6,
"learning_rate": 1.5575510204081632e-05,
"loss": 0.8205,
"step": 3730
},
{
"epoch": 74.8,
"learning_rate": 1.5453061224489795e-05,
"loss": 0.8301,
"step": 3740
},
{
"epoch": 75.0,
"learning_rate": 1.5330612244897962e-05,
"loss": 0.8168,
"step": 3750
},
{
"epoch": 75.2,
"learning_rate": 1.5208163265306121e-05,
"loss": 0.8503,
"step": 3760
},
{
"epoch": 75.4,
"learning_rate": 1.5085714285714288e-05,
"loss": 0.825,
"step": 3770
},
{
"epoch": 75.6,
"learning_rate": 1.496326530612245e-05,
"loss": 0.8255,
"step": 3780
},
{
"epoch": 75.8,
"learning_rate": 1.4840816326530612e-05,
"loss": 0.827,
"step": 3790
},
{
"epoch": 76.0,
"learning_rate": 1.4718367346938775e-05,
"loss": 0.7891,
"step": 3800
},
{
"epoch": 76.2,
"learning_rate": 1.4595918367346938e-05,
"loss": 0.8504,
"step": 3810
},
{
"epoch": 76.4,
"learning_rate": 1.4473469387755103e-05,
"loss": 0.8318,
"step": 3820
},
{
"epoch": 76.6,
"learning_rate": 1.4351020408163266e-05,
"loss": 0.8401,
"step": 3830
},
{
"epoch": 76.8,
"learning_rate": 1.422857142857143e-05,
"loss": 0.8284,
"step": 3840
},
{
"epoch": 77.0,
"learning_rate": 1.410612244897959e-05,
"loss": 0.801,
"step": 3850
},
{
"epoch": 77.2,
"learning_rate": 1.3983673469387755e-05,
"loss": 0.8628,
"step": 3860
},
{
"epoch": 77.4,
"learning_rate": 1.3861224489795918e-05,
"loss": 0.8339,
"step": 3870
},
{
"epoch": 77.6,
"learning_rate": 1.3738775510204082e-05,
"loss": 0.817,
"step": 3880
},
{
"epoch": 77.8,
"learning_rate": 1.3616326530612245e-05,
"loss": 0.8374,
"step": 3890
},
{
"epoch": 78.0,
"learning_rate": 1.349387755102041e-05,
"loss": 0.7855,
"step": 3900
},
{
"epoch": 78.2,
"learning_rate": 1.3371428571428572e-05,
"loss": 0.8433,
"step": 3910
},
{
"epoch": 78.4,
"learning_rate": 1.3248979591836735e-05,
"loss": 0.821,
"step": 3920
},
{
"epoch": 78.6,
"learning_rate": 1.3126530612244897e-05,
"loss": 0.8174,
"step": 3930
},
{
"epoch": 78.8,
"learning_rate": 1.3004081632653062e-05,
"loss": 0.8496,
"step": 3940
},
{
"epoch": 79.0,
"learning_rate": 1.2881632653061225e-05,
"loss": 0.7955,
"step": 3950
},
{
"epoch": 79.2,
"learning_rate": 1.2759183673469388e-05,
"loss": 0.8538,
"step": 3960
},
{
"epoch": 79.4,
"learning_rate": 1.263673469387755e-05,
"loss": 0.8431,
"step": 3970
},
{
"epoch": 79.6,
"learning_rate": 1.2514285714285715e-05,
"loss": 0.84,
"step": 3980
},
{
"epoch": 79.8,
"learning_rate": 1.2391836734693879e-05,
"loss": 0.8186,
"step": 3990
},
{
"epoch": 80.0,
"learning_rate": 1.2269387755102042e-05,
"loss": 0.8045,
"step": 4000
},
{
"epoch": 80.0,
"eval_loss": 0.3606957793235779,
"eval_runtime": 246.7059,
"eval_samples_per_second": 21.126,
"eval_steps_per_second": 1.321,
"eval_wer": 0.3621335971260478,
"step": 4000
},
{
"epoch": 80.2,
"learning_rate": 1.2146938775510205e-05,
"loss": 0.8382,
"step": 4010
},
{
"epoch": 80.4,
"learning_rate": 1.2024489795918368e-05,
"loss": 0.8248,
"step": 4020
},
{
"epoch": 80.6,
"learning_rate": 1.190204081632653e-05,
"loss": 0.8309,
"step": 4030
},
{
"epoch": 80.8,
"learning_rate": 1.1779591836734694e-05,
"loss": 0.8149,
"step": 4040
},
{
"epoch": 81.0,
"learning_rate": 1.1657142857142857e-05,
"loss": 0.806,
"step": 4050
},
{
"epoch": 81.2,
"learning_rate": 1.1534693877551022e-05,
"loss": 0.8475,
"step": 4060
},
{
"epoch": 81.4,
"learning_rate": 1.1412244897959185e-05,
"loss": 0.8478,
"step": 4070
},
{
"epoch": 81.6,
"learning_rate": 1.1289795918367348e-05,
"loss": 0.813,
"step": 4080
},
{
"epoch": 81.8,
"learning_rate": 1.116734693877551e-05,
"loss": 0.8231,
"step": 4090
},
{
"epoch": 82.0,
"learning_rate": 1.1044897959183672e-05,
"loss": 0.8105,
"step": 4100
},
{
"epoch": 82.2,
"learning_rate": 1.0922448979591837e-05,
"loss": 0.8554,
"step": 4110
},
{
"epoch": 82.4,
"learning_rate": 1.08e-05,
"loss": 0.8084,
"step": 4120
},
{
"epoch": 82.6,
"learning_rate": 1.0677551020408163e-05,
"loss": 0.803,
"step": 4130
},
{
"epoch": 82.8,
"learning_rate": 1.0555102040816326e-05,
"loss": 0.833,
"step": 4140
},
{
"epoch": 83.0,
"learning_rate": 1.043265306122449e-05,
"loss": 0.781,
"step": 4150
},
{
"epoch": 83.2,
"learning_rate": 1.0310204081632654e-05,
"loss": 0.8371,
"step": 4160
},
{
"epoch": 83.4,
"learning_rate": 1.0187755102040817e-05,
"loss": 0.8149,
"step": 4170
},
{
"epoch": 83.6,
"learning_rate": 1.006530612244898e-05,
"loss": 0.8383,
"step": 4180
},
{
"epoch": 83.8,
"learning_rate": 9.942857142857143e-06,
"loss": 0.8085,
"step": 4190
},
{
"epoch": 84.0,
"learning_rate": 9.820408163265306e-06,
"loss": 0.7775,
"step": 4200
},
{
"epoch": 84.2,
"learning_rate": 9.697959183673469e-06,
"loss": 0.8459,
"step": 4210
},
{
"epoch": 84.4,
"learning_rate": 9.575510204081632e-06,
"loss": 0.7982,
"step": 4220
},
{
"epoch": 84.6,
"learning_rate": 9.453061224489797e-06,
"loss": 0.8121,
"step": 4230
},
{
"epoch": 84.8,
"learning_rate": 9.33061224489796e-06,
"loss": 0.848,
"step": 4240
},
{
"epoch": 85.0,
"learning_rate": 9.208163265306123e-06,
"loss": 0.7948,
"step": 4250
},
{
"epoch": 85.2,
"learning_rate": 9.085714285714286e-06,
"loss": 0.8709,
"step": 4260
},
{
"epoch": 85.4,
"learning_rate": 8.963265306122449e-06,
"loss": 0.837,
"step": 4270
},
{
"epoch": 85.6,
"learning_rate": 8.840816326530612e-06,
"loss": 0.8078,
"step": 4280
},
{
"epoch": 85.8,
"learning_rate": 8.718367346938775e-06,
"loss": 0.7987,
"step": 4290
},
{
"epoch": 86.0,
"learning_rate": 8.595918367346938e-06,
"loss": 0.8008,
"step": 4300
},
{
"epoch": 86.2,
"learning_rate": 8.473469387755103e-06,
"loss": 0.8481,
"step": 4310
},
{
"epoch": 86.4,
"learning_rate": 8.351020408163266e-06,
"loss": 0.8025,
"step": 4320
},
{
"epoch": 86.6,
"learning_rate": 8.22857142857143e-06,
"loss": 0.8173,
"step": 4330
},
{
"epoch": 86.8,
"learning_rate": 8.106122448979592e-06,
"loss": 0.8162,
"step": 4340
},
{
"epoch": 87.0,
"learning_rate": 7.983673469387755e-06,
"loss": 0.7663,
"step": 4350
},
{
"epoch": 87.2,
"learning_rate": 7.861224489795918e-06,
"loss": 0.8386,
"step": 4360
},
{
"epoch": 87.4,
"learning_rate": 7.738775510204081e-06,
"loss": 0.845,
"step": 4370
},
{
"epoch": 87.6,
"learning_rate": 7.6163265306122444e-06,
"loss": 0.8114,
"step": 4380
},
{
"epoch": 87.8,
"learning_rate": 7.493877551020408e-06,
"loss": 0.8129,
"step": 4390
},
{
"epoch": 88.0,
"learning_rate": 7.371428571428571e-06,
"loss": 0.7757,
"step": 4400
},
{
"epoch": 88.2,
"learning_rate": 7.248979591836735e-06,
"loss": 0.847,
"step": 4410
},
{
"epoch": 88.4,
"learning_rate": 7.1265306122448975e-06,
"loss": 0.8315,
"step": 4420
},
{
"epoch": 88.6,
"learning_rate": 7.004081632653061e-06,
"loss": 0.8118,
"step": 4430
},
{
"epoch": 88.8,
"learning_rate": 6.8816326530612245e-06,
"loss": 0.8117,
"step": 4440
},
{
"epoch": 89.0,
"learning_rate": 6.759183673469388e-06,
"loss": 0.7848,
"step": 4450
},
{
"epoch": 89.2,
"learning_rate": 6.6367346938775506e-06,
"loss": 0.8463,
"step": 4460
},
{
"epoch": 89.4,
"learning_rate": 6.5142857142857145e-06,
"loss": 0.8147,
"step": 4470
},
{
"epoch": 89.6,
"learning_rate": 6.3918367346938775e-06,
"loss": 0.8026,
"step": 4480
},
{
"epoch": 89.8,
"learning_rate": 6.2693877551020414e-06,
"loss": 0.8148,
"step": 4490
},
{
"epoch": 90.0,
"learning_rate": 6.146938775510204e-06,
"loss": 0.7799,
"step": 4500
},
{
"epoch": 90.0,
"eval_loss": 0.3501129448413849,
"eval_runtime": 245.2219,
"eval_samples_per_second": 21.254,
"eval_steps_per_second": 1.329,
"eval_wer": 0.35609413234758164,
"step": 4500
},
{
"epoch": 90.2,
"learning_rate": 6.0244897959183675e-06,
"loss": 0.8452,
"step": 4510
},
{
"epoch": 90.4,
"learning_rate": 5.902040816326531e-06,
"loss": 0.8101,
"step": 4520
},
{
"epoch": 90.6,
"learning_rate": 5.7795918367346945e-06,
"loss": 0.8223,
"step": 4530
},
{
"epoch": 90.8,
"learning_rate": 5.6571428571428576e-06,
"loss": 0.813,
"step": 4540
},
{
"epoch": 91.0,
"learning_rate": 5.534693877551021e-06,
"loss": 0.7841,
"step": 4550
},
{
"epoch": 91.2,
"learning_rate": 5.412244897959184e-06,
"loss": 0.8426,
"step": 4560
},
{
"epoch": 91.4,
"learning_rate": 5.2897959183673476e-06,
"loss": 0.8095,
"step": 4570
},
{
"epoch": 91.6,
"learning_rate": 5.167346938775511e-06,
"loss": 0.8121,
"step": 4580
},
{
"epoch": 91.8,
"learning_rate": 5.044897959183674e-06,
"loss": 0.8114,
"step": 4590
},
{
"epoch": 92.0,
"learning_rate": 4.922448979591837e-06,
"loss": 0.7884,
"step": 4600
},
{
"epoch": 92.2,
"learning_rate": 4.800000000000001e-06,
"loss": 0.8417,
"step": 4610
},
{
"epoch": 92.4,
"learning_rate": 4.677551020408164e-06,
"loss": 0.8047,
"step": 4620
},
{
"epoch": 92.6,
"learning_rate": 4.555102040816326e-06,
"loss": 0.8168,
"step": 4630
},
{
"epoch": 92.8,
"learning_rate": 4.43265306122449e-06,
"loss": 0.8138,
"step": 4640
},
{
"epoch": 93.0,
"learning_rate": 4.310204081632653e-06,
"loss": 0.7879,
"step": 4650
},
{
"epoch": 93.2,
"learning_rate": 4.187755102040817e-06,
"loss": 0.8256,
"step": 4660
},
{
"epoch": 93.4,
"learning_rate": 4.065306122448979e-06,
"loss": 0.8187,
"step": 4670
},
{
"epoch": 93.6,
"learning_rate": 3.942857142857143e-06,
"loss": 0.8089,
"step": 4680
},
{
"epoch": 93.8,
"learning_rate": 3.820408163265306e-06,
"loss": 0.8059,
"step": 4690
},
{
"epoch": 94.0,
"learning_rate": 3.6979591836734694e-06,
"loss": 0.7852,
"step": 4700
},
{
"epoch": 94.2,
"learning_rate": 3.575510204081633e-06,
"loss": 0.846,
"step": 4710
},
{
"epoch": 94.4,
"learning_rate": 3.4530612244897963e-06,
"loss": 0.8134,
"step": 4720
},
{
"epoch": 94.6,
"learning_rate": 3.3306122448979594e-06,
"loss": 0.7982,
"step": 4730
},
{
"epoch": 94.8,
"learning_rate": 3.208163265306123e-06,
"loss": 0.8181,
"step": 4740
},
{
"epoch": 95.0,
"learning_rate": 3.085714285714286e-06,
"loss": 0.7856,
"step": 4750
},
{
"epoch": 95.2,
"learning_rate": 2.963265306122449e-06,
"loss": 0.8306,
"step": 4760
},
{
"epoch": 95.4,
"learning_rate": 2.840816326530612e-06,
"loss": 0.8227,
"step": 4770
},
{
"epoch": 95.6,
"learning_rate": 2.7183673469387755e-06,
"loss": 0.8109,
"step": 4780
},
{
"epoch": 95.8,
"learning_rate": 2.5959183673469386e-06,
"loss": 0.787,
"step": 4790
},
{
"epoch": 96.0,
"learning_rate": 2.473469387755102e-06,
"loss": 0.7832,
"step": 4800
},
{
"epoch": 96.2,
"learning_rate": 2.351020408163265e-06,
"loss": 0.8228,
"step": 4810
},
{
"epoch": 96.4,
"learning_rate": 2.2285714285714286e-06,
"loss": 0.8168,
"step": 4820
},
{
"epoch": 96.6,
"learning_rate": 2.1061224489795916e-06,
"loss": 0.8122,
"step": 4830
},
{
"epoch": 96.8,
"learning_rate": 1.983673469387755e-06,
"loss": 0.7942,
"step": 4840
},
{
"epoch": 97.0,
"learning_rate": 1.8612244897959184e-06,
"loss": 0.7948,
"step": 4850
},
{
"epoch": 97.2,
"learning_rate": 1.7387755102040817e-06,
"loss": 0.8036,
"step": 4860
},
{
"epoch": 97.4,
"learning_rate": 1.616326530612245e-06,
"loss": 0.815,
"step": 4870
},
{
"epoch": 97.6,
"learning_rate": 1.4938775510204082e-06,
"loss": 0.8111,
"step": 4880
},
{
"epoch": 97.8,
"learning_rate": 1.3714285714285715e-06,
"loss": 0.813,
"step": 4890
},
{
"epoch": 98.0,
"learning_rate": 1.2489795918367347e-06,
"loss": 0.7897,
"step": 4900
},
{
"epoch": 98.2,
"learning_rate": 1.126530612244898e-06,
"loss": 0.8249,
"step": 4910
},
{
"epoch": 98.4,
"learning_rate": 1.0040816326530613e-06,
"loss": 0.802,
"step": 4920
},
{
"epoch": 98.6,
"learning_rate": 8.816326530612244e-07,
"loss": 0.7912,
"step": 4930
},
{
"epoch": 98.8,
"learning_rate": 7.591836734693878e-07,
"loss": 0.8246,
"step": 4940
},
{
"epoch": 99.0,
"learning_rate": 6.367346938775511e-07,
"loss": 0.774,
"step": 4950
},
{
"epoch": 99.2,
"learning_rate": 5.142857142857143e-07,
"loss": 0.831,
"step": 4960
},
{
"epoch": 99.4,
"learning_rate": 3.918367346938776e-07,
"loss": 0.8066,
"step": 4970
},
{
"epoch": 99.6,
"learning_rate": 2.693877551020408e-07,
"loss": 0.8148,
"step": 4980
},
{
"epoch": 99.8,
"learning_rate": 1.4693877551020407e-07,
"loss": 0.8274,
"step": 4990
},
{
"epoch": 100.0,
"learning_rate": 2.448979591836735e-08,
"loss": 0.7769,
"step": 5000
},
{
"epoch": 100.0,
"eval_loss": 0.3502330183982849,
"eval_runtime": 245.5235,
"eval_samples_per_second": 21.228,
"eval_steps_per_second": 1.328,
"eval_wer": 0.35502681314104234,
"step": 5000
},
{
"epoch": 100.0,
"step": 5000,
"total_flos": 1.6395774173071445e+20,
"train_loss": 0.9013227667808533,
"train_runtime": 60806.5864,
"train_samples_per_second": 21.06,
"train_steps_per_second": 0.082
}
],
"max_steps": 5000,
"num_train_epochs": 100,
"total_flos": 1.6395774173071445e+20,
"trial_name": null,
"trial_params": null
}