{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.020080321285141, "eval_steps": 200, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10040160642570281, "eval_loss": 0.3807084858417511, "eval_runtime": 206.6442, "eval_samples_per_second": 26.04, "eval_steps_per_second": 0.411, "eval_wer": 0.2514466384298753, "step": 200 }, { "epoch": 0.20080321285140562, "eval_loss": 0.253967821598053, "eval_runtime": 206.4967, "eval_samples_per_second": 26.059, "eval_steps_per_second": 0.412, "eval_wer": 0.2642923899858816, "step": 400 }, { "epoch": 0.25100401606425704, "grad_norm": 1.857898235321045, "learning_rate": 0.0001491, "loss": 2.4874, "step": 500 }, { "epoch": 0.30120481927710846, "eval_loss": 0.2642447352409363, "eval_runtime": 205.9594, "eval_samples_per_second": 26.127, "eval_steps_per_second": 0.413, "eval_wer": 0.30376424267732505, "step": 600 }, { "epoch": 0.40160642570281124, "eval_loss": 0.3125462234020233, "eval_runtime": 205.1387, "eval_samples_per_second": 26.231, "eval_steps_per_second": 0.414, "eval_wer": 0.39048300821253157, "step": 800 }, { "epoch": 0.5020080321285141, "grad_norm": 3.634869337081909, "learning_rate": 0.00029909999999999995, "loss": 0.3991, "step": 1000 }, { "epoch": 0.5020080321285141, "eval_loss": 0.3531426191329956, "eval_runtime": 205.1455, "eval_samples_per_second": 26.23, "eval_steps_per_second": 0.414, "eval_wer": 0.3939430094056354, "step": 1000 }, { "epoch": 0.6024096385542169, "eval_loss": 0.3571958839893341, "eval_runtime": 209.3856, "eval_samples_per_second": 25.699, "eval_steps_per_second": 0.406, "eval_wer": 0.40390542663405515, "step": 1200 }, { "epoch": 0.7028112449799196, "eval_loss": 0.36791086196899414, "eval_runtime": 206.8164, "eval_samples_per_second": 26.018, "eval_steps_per_second": 0.411, "eval_wer": 0.4052576110083716, "step": 1400 }, { "epoch": 0.7530120481927711, "grad_norm": 3.5483558177948, "learning_rate": 0.0002834333333333333, "loss": 0.4512, "step": 1500 }, { "epoch": 0.8032128514056225, "eval_loss": 0.35897189378738403, "eval_runtime": 207.7252, "eval_samples_per_second": 25.904, "eval_steps_per_second": 0.409, "eval_wer": 0.38767921414225776, "step": 1600 }, { "epoch": 0.9036144578313253, "eval_loss": 0.3732704222202301, "eval_runtime": 205.6494, "eval_samples_per_second": 26.166, "eval_steps_per_second": 0.413, "eval_wer": 0.4006840462128895, "step": 1800 }, { "epoch": 1.0040160642570282, "grad_norm": 2.2905380725860596, "learning_rate": 0.00026676666666666663, "loss": 0.4333, "step": 2000 }, { "epoch": 1.0040160642570282, "eval_loss": 0.377088725566864, "eval_runtime": 204.613, "eval_samples_per_second": 26.298, "eval_steps_per_second": 0.415, "eval_wer": 0.4243273876990992, "step": 2000 }, { "epoch": 1.104417670682731, "eval_loss": 0.3604430556297302, "eval_runtime": 208.0965, "eval_samples_per_second": 25.858, "eval_steps_per_second": 0.408, "eval_wer": 0.3867048459901768, "step": 2200 }, { "epoch": 1.2048192771084336, "eval_loss": 0.3431110978126526, "eval_runtime": 206.2637, "eval_samples_per_second": 26.088, "eval_steps_per_second": 0.412, "eval_wer": 0.38137564875022373, "step": 2400 }, { "epoch": 1.2550200803212852, "grad_norm": 1.8699342012405396, "learning_rate": 0.00025009999999999995, "loss": 0.3468, "step": 2500 }, { "epoch": 1.3052208835341366, "eval_loss": 0.32902058959007263, "eval_runtime": 205.4777, "eval_samples_per_second": 26.188, "eval_steps_per_second": 0.414, "eval_wer": 0.3778559923641353, "step": 2600 }, { "epoch": 1.4056224899598393, "eval_loss": 0.33407700061798096, "eval_runtime": 205.6359, "eval_samples_per_second": 26.168, "eval_steps_per_second": 0.413, "eval_wer": 0.3647119648432063, "step": 2800 }, { "epoch": 1.5060240963855422, "grad_norm": 2.062389373779297, "learning_rate": 0.0002334333333333333, "loss": 0.3503, "step": 3000 }, { "epoch": 1.5060240963855422, "eval_loss": 0.3247535228729248, "eval_runtime": 206.3116, "eval_samples_per_second": 26.082, "eval_steps_per_second": 0.412, "eval_wer": 0.3614706993577124, "step": 3000 }, { "epoch": 1.606425702811245, "eval_loss": 0.33116209506988525, "eval_runtime": 203.9912, "eval_samples_per_second": 26.379, "eval_steps_per_second": 0.417, "eval_wer": 0.35512736383702204, "step": 3200 }, { "epoch": 1.7068273092369477, "eval_loss": 0.3410908281803131, "eval_runtime": 204.5054, "eval_samples_per_second": 26.312, "eval_steps_per_second": 0.416, "eval_wer": 0.3836226610193084, "step": 3400 }, { "epoch": 1.7570281124497993, "grad_norm": 0.9907544255256653, "learning_rate": 0.00021679999999999998, "loss": 0.3418, "step": 3500 }, { "epoch": 1.8072289156626506, "eval_loss": 0.3116574287414551, "eval_runtime": 205.0392, "eval_samples_per_second": 26.244, "eval_steps_per_second": 0.415, "eval_wer": 0.33752908190658, "step": 3600 }, { "epoch": 1.9076305220883534, "eval_loss": 0.3196774423122406, "eval_runtime": 206.2716, "eval_samples_per_second": 26.087, "eval_steps_per_second": 0.412, "eval_wer": 0.34317644017578397, "step": 3800 }, { "epoch": 2.0080321285140563, "grad_norm": 1.0384626388549805, "learning_rate": 0.0002001333333333333, "loss": 0.3181, "step": 4000 }, { "epoch": 2.0080321285140563, "eval_loss": 0.30675315856933594, "eval_runtime": 206.0737, "eval_samples_per_second": 26.112, "eval_steps_per_second": 0.412, "eval_wer": 0.3339696553918352, "step": 4000 }, { "epoch": 2.108433734939759, "eval_loss": 0.31376445293426514, "eval_runtime": 209.2791, "eval_samples_per_second": 25.712, "eval_steps_per_second": 0.406, "eval_wer": 0.3358388514386844, "step": 4200 }, { "epoch": 2.208835341365462, "eval_loss": 0.31388720870018005, "eval_runtime": 204.9118, "eval_samples_per_second": 26.26, "eval_steps_per_second": 0.415, "eval_wer": 0.3333731034619897, "step": 4400 }, { "epoch": 2.2590361445783134, "grad_norm": 0.5868389010429382, "learning_rate": 0.00018346666666666664, "loss": 0.2423, "step": 4500 }, { "epoch": 2.3092369477911645, "eval_loss": 0.3191888928413391, "eval_runtime": 204.834, "eval_samples_per_second": 26.27, "eval_steps_per_second": 0.415, "eval_wer": 0.32848137763725666, "step": 4600 }, { "epoch": 2.4096385542168672, "eval_loss": 0.2928995192050934, "eval_runtime": 204.43, "eval_samples_per_second": 26.322, "eval_steps_per_second": 0.416, "eval_wer": 0.31682872994094136, "step": 4800 }, { "epoch": 2.5100401606425704, "grad_norm": 1.3247759342193604, "learning_rate": 0.0001668, "loss": 0.2327, "step": 5000 }, { "epoch": 2.5100401606425704, "eval_loss": 0.29208171367645264, "eval_runtime": 206.3612, "eval_samples_per_second": 26.076, "eval_steps_per_second": 0.412, "eval_wer": 0.3103064288412973, "step": 5000 }, { "epoch": 2.610441767068273, "eval_loss": 0.2801830470561981, "eval_runtime": 204.3678, "eval_samples_per_second": 26.33, "eval_steps_per_second": 0.416, "eval_wer": 0.3037443576129969, "step": 5200 }, { "epoch": 2.710843373493976, "eval_loss": 0.2811721861362457, "eval_runtime": 204.4403, "eval_samples_per_second": 26.321, "eval_steps_per_second": 0.416, "eval_wer": 0.29624768836127185, "step": 5400 }, { "epoch": 2.7610441767068274, "grad_norm": 1.381541132926941, "learning_rate": 0.00015013333333333331, "loss": 0.2374, "step": 5500 }, { "epoch": 2.8112449799196786, "eval_loss": 0.28872984647750854, "eval_runtime": 204.3069, "eval_samples_per_second": 26.338, "eval_steps_per_second": 0.416, "eval_wer": 0.30422159915687325, "step": 5600 }, { "epoch": 2.9116465863453813, "eval_loss": 0.27397701144218445, "eval_runtime": 204.1464, "eval_samples_per_second": 26.359, "eval_steps_per_second": 0.416, "eval_wer": 0.2927081469108553, "step": 5800 }, { "epoch": 3.0120481927710845, "grad_norm": 1.4617916345596313, "learning_rate": 0.00013346666666666667, "loss": 0.2136, "step": 6000 }, { "epoch": 3.0120481927710845, "eval_loss": 0.2662462592124939, "eval_runtime": 203.8941, "eval_samples_per_second": 26.391, "eval_steps_per_second": 0.417, "eval_wer": 0.28296446539004555, "step": 6000 }, { "epoch": 3.112449799196787, "eval_loss": 0.28285130858421326, "eval_runtime": 206.1704, "eval_samples_per_second": 26.1, "eval_steps_per_second": 0.412, "eval_wer": 0.2890294100101414, "step": 6200 }, { "epoch": 3.21285140562249, "eval_loss": 0.2729070484638214, "eval_runtime": 206.2438, "eval_samples_per_second": 26.09, "eval_steps_per_second": 0.412, "eval_wer": 0.28692159319135396, "step": 6400 }, { "epoch": 3.2630522088353415, "grad_norm": 0.8870707750320435, "learning_rate": 0.00011679999999999998, "loss": 0.167, "step": 6500 }, { "epoch": 3.3132530120481927, "eval_loss": 0.2776893675327301, "eval_runtime": 204.2022, "eval_samples_per_second": 26.351, "eval_steps_per_second": 0.416, "eval_wer": 0.28892998468850045, "step": 6600 }, { "epoch": 3.4136546184738954, "eval_loss": 0.2711654603481293, "eval_runtime": 203.7376, "eval_samples_per_second": 26.411, "eval_steps_per_second": 0.417, "eval_wer": 0.28095607389289906, "step": 6800 }, { "epoch": 3.5140562248995986, "grad_norm": 1.0165985822677612, "learning_rate": 0.00010013333333333333, "loss": 0.1614, "step": 7000 }, { "epoch": 3.5140562248995986, "eval_loss": 0.2688385844230652, "eval_runtime": 204.1623, "eval_samples_per_second": 26.356, "eval_steps_per_second": 0.416, "eval_wer": 0.27091411640716656, "step": 7000 }, { "epoch": 3.6144578313253013, "eval_loss": 0.2589295208454132, "eval_runtime": 205.2749, "eval_samples_per_second": 26.214, "eval_steps_per_second": 0.414, "eval_wer": 0.26626101135437175, "step": 7200 }, { "epoch": 3.714859437751004, "eval_loss": 0.26514673233032227, "eval_runtime": 204.2135, "eval_samples_per_second": 26.35, "eval_steps_per_second": 0.416, "eval_wer": 0.2669768736701863, "step": 7400 }, { "epoch": 3.765060240963855, "grad_norm": 0.7397546172142029, "learning_rate": 8.346666666666666e-05, "loss": 0.1529, "step": 7500 }, { "epoch": 3.8152610441767068, "eval_loss": 0.25074735283851624, "eval_runtime": 204.2336, "eval_samples_per_second": 26.347, "eval_steps_per_second": 0.416, "eval_wer": 0.2637157231203643, "step": 7600 }, { "epoch": 3.9156626506024095, "eval_loss": 0.2493942528963089, "eval_runtime": 206.072, "eval_samples_per_second": 26.112, "eval_steps_per_second": 0.412, "eval_wer": 0.2567957207341566, "step": 7800 }, { "epoch": 4.016064257028113, "grad_norm": 0.785851776599884, "learning_rate": 6.68e-05, "loss": 0.1496, "step": 8000 }, { "epoch": 4.016064257028113, "eval_loss": 0.25821030139923096, "eval_runtime": 204.9558, "eval_samples_per_second": 26.254, "eval_steps_per_second": 0.415, "eval_wer": 0.2580484797868321, "step": 8000 }, { "epoch": 4.116465863453815, "eval_loss": 0.2650238871574402, "eval_runtime": 204.7852, "eval_samples_per_second": 26.276, "eval_steps_per_second": 0.415, "eval_wer": 0.25753146811429933, "step": 8200 }, { "epoch": 4.216867469879518, "eval_loss": 0.26561084389686584, "eval_runtime": 210.1403, "eval_samples_per_second": 25.607, "eval_steps_per_second": 0.404, "eval_wer": 0.25598043309670104, "step": 8400 }, { "epoch": 4.267068273092369, "grad_norm": 0.34119465947151184, "learning_rate": 5.013333333333332e-05, "loss": 0.1128, "step": 8500 }, { "epoch": 4.317269076305221, "eval_loss": 0.25430822372436523, "eval_runtime": 203.852, "eval_samples_per_second": 26.397, "eval_steps_per_second": 0.417, "eval_wer": 0.25118813259360895, "step": 8600 }, { "epoch": 4.417670682730924, "eval_loss": 0.2586837112903595, "eval_runtime": 202.7677, "eval_samples_per_second": 26.538, "eval_steps_per_second": 0.419, "eval_wer": 0.24987571834794886, "step": 8800 }, { "epoch": 4.518072289156627, "grad_norm": 0.5558347105979919, "learning_rate": 3.346666666666666e-05, "loss": 0.1109, "step": 9000 }, { "epoch": 4.518072289156627, "eval_loss": 0.2540307939052582, "eval_runtime": 202.8954, "eval_samples_per_second": 26.521, "eval_steps_per_second": 0.419, "eval_wer": 0.24599813080395316, "step": 9000 }, { "epoch": 4.618473895582329, "eval_loss": 0.2546459436416626, "eval_runtime": 208.4343, "eval_samples_per_second": 25.816, "eval_steps_per_second": 0.408, "eval_wer": 0.24245858935353656, "step": 9200 }, { "epoch": 4.718875502008032, "eval_loss": 0.25800344347953796, "eval_runtime": 205.7304, "eval_samples_per_second": 26.156, "eval_steps_per_second": 0.413, "eval_wer": 0.24198134780966016, "step": 9400 }, { "epoch": 4.769076305220883, "grad_norm": 0.9226210117340088, "learning_rate": 1.68e-05, "loss": 0.1028, "step": 9500 }, { "epoch": 4.8192771084337345, "eval_loss": 0.25135332345962524, "eval_runtime": 203.1799, "eval_samples_per_second": 26.484, "eval_steps_per_second": 0.418, "eval_wer": 0.24035077253474915, "step": 9600 }, { "epoch": 4.919678714859438, "eval_loss": 0.2509777843952179, "eval_runtime": 203.2596, "eval_samples_per_second": 26.474, "eval_steps_per_second": 0.418, "eval_wer": 0.24025134721310823, "step": 9800 }, { "epoch": 5.020080321285141, "grad_norm": 4.224822521209717, "learning_rate": 1.3333333333333334e-07, "loss": 0.1069, "step": 10000 }, { "epoch": 5.020080321285141, "eval_loss": 0.2515573501586914, "eval_runtime": 209.1262, "eval_samples_per_second": 25.731, "eval_steps_per_second": 0.406, "eval_wer": 0.24033088747042097, "step": 10000 }, { "epoch": 5.020080321285141, "step": 10000, "total_flos": 6.356146932571761e+18, "train_loss": 0.3559208065032959, "train_runtime": 12994.9936, "train_samples_per_second": 3.078, "train_steps_per_second": 0.77 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.356146932571761e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }