|
{ |
|
"best_metric": 0.35516515374183655, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-large-2024_09_17-batch-size64_epochs100_freeze/checkpoint-13394", |
|
"epoch": 84.0, |
|
"eval_steps": 500, |
|
"global_step": 15204, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_explained_variance": 0.28046968350043666, |
|
"eval_loss": 0.38582414388656616, |
|
"eval_mae": 0.15708860754966736, |
|
"eval_mse": 0.04635250195860863, |
|
"eval_r2": 0.26238919671070565, |
|
"eval_rmse": 0.21529631316661835, |
|
"eval_runtime": 68.5532, |
|
"eval_samples_per_second": 56.073, |
|
"eval_steps_per_second": 0.89, |
|
"learning_rate": 0.001, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_explained_variance": 0.32091750548436093, |
|
"eval_loss": 0.37635815143585205, |
|
"eval_mae": 0.1467229723930359, |
|
"eval_mse": 0.04398971050977707, |
|
"eval_r2": 0.3120521114085856, |
|
"eval_rmse": 0.20973724126815796, |
|
"eval_runtime": 65.3616, |
|
"eval_samples_per_second": 58.811, |
|
"eval_steps_per_second": 0.933, |
|
"learning_rate": 0.001, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.7624309392265194, |
|
"grad_norm": 0.29469817876815796, |
|
"learning_rate": 0.001, |
|
"loss": 0.4473, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_explained_variance": 0.3393913645010728, |
|
"eval_loss": 0.3715941309928894, |
|
"eval_mae": 0.1450481116771698, |
|
"eval_mse": 0.04250793904066086, |
|
"eval_r2": 0.33185449725151883, |
|
"eval_rmse": 0.20617453753948212, |
|
"eval_runtime": 65.4448, |
|
"eval_samples_per_second": 58.737, |
|
"eval_steps_per_second": 0.932, |
|
"learning_rate": 0.001, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_explained_variance": 0.35663692767803484, |
|
"eval_loss": 0.3672849237918854, |
|
"eval_mae": 0.1395464986562729, |
|
"eval_mse": 0.0409623458981514, |
|
"eval_r2": 0.35477505001012255, |
|
"eval_rmse": 0.20239156484603882, |
|
"eval_runtime": 65.8223, |
|
"eval_samples_per_second": 58.4, |
|
"eval_steps_per_second": 0.927, |
|
"learning_rate": 0.001, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_explained_variance": 0.3493932577279898, |
|
"eval_loss": 0.3692065477371216, |
|
"eval_mae": 0.1393202394247055, |
|
"eval_mse": 0.041857048869132996, |
|
"eval_r2": 0.3425061497286567, |
|
"eval_rmse": 0.20458994805812836, |
|
"eval_runtime": 66.3389, |
|
"eval_samples_per_second": 57.945, |
|
"eval_steps_per_second": 0.92, |
|
"learning_rate": 0.001, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 5.524861878453039, |
|
"grad_norm": 0.19042304158210754, |
|
"learning_rate": 0.001, |
|
"loss": 0.3892, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_explained_variance": 0.35904277287996733, |
|
"eval_loss": 0.3672534227371216, |
|
"eval_mae": 0.14119164645671844, |
|
"eval_mse": 0.040877003222703934, |
|
"eval_r2": 0.3553590945142445, |
|
"eval_rmse": 0.2021806240081787, |
|
"eval_runtime": 65.5836, |
|
"eval_samples_per_second": 58.612, |
|
"eval_steps_per_second": 0.93, |
|
"learning_rate": 0.001, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_explained_variance": 0.34988729311869693, |
|
"eval_loss": 0.3680865168571472, |
|
"eval_mae": 0.14079739153385162, |
|
"eval_mse": 0.04153257608413696, |
|
"eval_r2": 0.3456613343062778, |
|
"eval_rmse": 0.2037954330444336, |
|
"eval_runtime": 64.4017, |
|
"eval_samples_per_second": 59.688, |
|
"eval_steps_per_second": 0.947, |
|
"learning_rate": 0.001, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_explained_variance": 0.36423414945602417, |
|
"eval_loss": 0.365603506565094, |
|
"eval_mae": 0.13892073929309845, |
|
"eval_mse": 0.04058730974793434, |
|
"eval_r2": 0.35962535995096967, |
|
"eval_rmse": 0.20146292448043823, |
|
"eval_runtime": 64.831, |
|
"eval_samples_per_second": 59.293, |
|
"eval_steps_per_second": 0.941, |
|
"learning_rate": 0.001, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 8.287292817679559, |
|
"grad_norm": 0.1760077178478241, |
|
"learning_rate": 0.001, |
|
"loss": 0.3855, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_explained_variance": 0.3612723258825449, |
|
"eval_loss": 0.36585840582847595, |
|
"eval_mae": 0.13438531756401062, |
|
"eval_mse": 0.04076695442199707, |
|
"eval_r2": 0.3554776353070419, |
|
"eval_rmse": 0.20190827548503876, |
|
"eval_runtime": 64.4706, |
|
"eval_samples_per_second": 59.624, |
|
"eval_steps_per_second": 0.946, |
|
"learning_rate": 0.001, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_explained_variance": 0.3561701728747441, |
|
"eval_loss": 0.366574227809906, |
|
"eval_mae": 0.13837845623493195, |
|
"eval_mse": 0.04093795642256737, |
|
"eval_r2": 0.3533183127533612, |
|
"eval_rmse": 0.2023313045501709, |
|
"eval_runtime": 63.2978, |
|
"eval_samples_per_second": 60.729, |
|
"eval_steps_per_second": 0.964, |
|
"learning_rate": 0.001, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_explained_variance": 0.3574172487625709, |
|
"eval_loss": 0.36660775542259216, |
|
"eval_mae": 0.13663478195667267, |
|
"eval_mse": 0.04090488329529762, |
|
"eval_r2": 0.35496352056496683, |
|
"eval_rmse": 0.20224955677986145, |
|
"eval_runtime": 66.2827, |
|
"eval_samples_per_second": 57.994, |
|
"eval_steps_per_second": 0.92, |
|
"learning_rate": 0.001, |
|
"step": 1991 |
|
}, |
|
{ |
|
"epoch": 11.049723756906078, |
|
"grad_norm": 0.14891982078552246, |
|
"learning_rate": 0.001, |
|
"loss": 0.3816, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_explained_variance": 0.3598099580177894, |
|
"eval_loss": 0.36626219749450684, |
|
"eval_mae": 0.13958622515201569, |
|
"eval_mse": 0.04085636883974075, |
|
"eval_r2": 0.35871773520484396, |
|
"eval_rmse": 0.20212958753108978, |
|
"eval_runtime": 64.7339, |
|
"eval_samples_per_second": 59.382, |
|
"eval_steps_per_second": 0.942, |
|
"learning_rate": 0.001, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_explained_variance": 0.37047534722548264, |
|
"eval_loss": 0.3631901741027832, |
|
"eval_mae": 0.1360856592655182, |
|
"eval_mse": 0.03979066386818886, |
|
"eval_r2": 0.3696611807758026, |
|
"eval_rmse": 0.1994759738445282, |
|
"eval_runtime": 65.3689, |
|
"eval_samples_per_second": 58.805, |
|
"eval_steps_per_second": 0.933, |
|
"learning_rate": 0.001, |
|
"step": 2353 |
|
}, |
|
{ |
|
"epoch": 13.812154696132596, |
|
"grad_norm": 0.14235170185565948, |
|
"learning_rate": 0.001, |
|
"loss": 0.381, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_explained_variance": 0.36284926304450404, |
|
"eval_loss": 0.36694806814193726, |
|
"eval_mae": 0.14229656755924225, |
|
"eval_mse": 0.04098258540034294, |
|
"eval_r2": 0.356153731540797, |
|
"eval_rmse": 0.20244155824184418, |
|
"eval_runtime": 64.126, |
|
"eval_samples_per_second": 59.945, |
|
"eval_steps_per_second": 0.951, |
|
"learning_rate": 0.001, |
|
"step": 2534 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_explained_variance": 0.36449302159822905, |
|
"eval_loss": 0.3644973933696747, |
|
"eval_mae": 0.1395292580127716, |
|
"eval_mse": 0.04036581516265869, |
|
"eval_r2": 0.36203359510531696, |
|
"eval_rmse": 0.2009124606847763, |
|
"eval_runtime": 64.0305, |
|
"eval_samples_per_second": 60.034, |
|
"eval_steps_per_second": 0.953, |
|
"learning_rate": 0.001, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_explained_variance": 0.37152041838719296, |
|
"eval_loss": 0.36393943428993225, |
|
"eval_mae": 0.13569381833076477, |
|
"eval_mse": 0.039987124502658844, |
|
"eval_r2": 0.36948082804864185, |
|
"eval_rmse": 0.19996780157089233, |
|
"eval_runtime": 63.9139, |
|
"eval_samples_per_second": 60.143, |
|
"eval_steps_per_second": 0.954, |
|
"learning_rate": 0.001, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 16.574585635359117, |
|
"grad_norm": 0.13048891723155975, |
|
"learning_rate": 0.001, |
|
"loss": 0.3811, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_explained_variance": 0.37284482900912946, |
|
"eval_loss": 0.36665406823158264, |
|
"eval_mae": 0.14128881692886353, |
|
"eval_mse": 0.04064851254224777, |
|
"eval_r2": 0.3621847777710853, |
|
"eval_rmse": 0.20161476731300354, |
|
"eval_runtime": 66.0408, |
|
"eval_samples_per_second": 58.206, |
|
"eval_steps_per_second": 0.924, |
|
"learning_rate": 0.001, |
|
"step": 3077 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_explained_variance": 0.3705295782822829, |
|
"eval_loss": 0.36318618059158325, |
|
"eval_mae": 0.13683417439460754, |
|
"eval_mse": 0.03981361910700798, |
|
"eval_r2": 0.369508628045091, |
|
"eval_rmse": 0.19953350722789764, |
|
"eval_runtime": 63.7575, |
|
"eval_samples_per_second": 60.291, |
|
"eval_steps_per_second": 0.957, |
|
"learning_rate": 0.001, |
|
"step": 3258 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_explained_variance": 0.3733598177249615, |
|
"eval_loss": 0.36302879452705383, |
|
"eval_mae": 0.13539017736911774, |
|
"eval_mse": 0.03974781930446625, |
|
"eval_r2": 0.3718927441003872, |
|
"eval_rmse": 0.19936855137348175, |
|
"eval_runtime": 63.4414, |
|
"eval_samples_per_second": 60.591, |
|
"eval_steps_per_second": 0.962, |
|
"learning_rate": 0.001, |
|
"step": 3439 |
|
}, |
|
{ |
|
"epoch": 19.337016574585636, |
|
"grad_norm": 0.13633792102336884, |
|
"learning_rate": 0.001, |
|
"loss": 0.3792, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_explained_variance": 0.3622324833503136, |
|
"eval_loss": 0.36489424109458923, |
|
"eval_mae": 0.13486731052398682, |
|
"eval_mse": 0.04052112251520157, |
|
"eval_r2": 0.35869592759647334, |
|
"eval_rmse": 0.20129859447479248, |
|
"eval_runtime": 64.295, |
|
"eval_samples_per_second": 59.787, |
|
"eval_steps_per_second": 0.949, |
|
"learning_rate": 0.001, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_explained_variance": 0.3630923858055702, |
|
"eval_loss": 0.3665030300617218, |
|
"eval_mae": 0.13610774278640747, |
|
"eval_mse": 0.040700096637010574, |
|
"eval_r2": 0.3584834523421166, |
|
"eval_rmse": 0.20174264907836914, |
|
"eval_runtime": 64.1739, |
|
"eval_samples_per_second": 59.9, |
|
"eval_steps_per_second": 0.951, |
|
"learning_rate": 0.001, |
|
"step": 3801 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_explained_variance": 0.3704591485170218, |
|
"eval_loss": 0.3647814095020294, |
|
"eval_mae": 0.1368531733751297, |
|
"eval_mse": 0.03999844938516617, |
|
"eval_r2": 0.3677615209740873, |
|
"eval_rmse": 0.19999612867832184, |
|
"eval_runtime": 63.5961, |
|
"eval_samples_per_second": 60.444, |
|
"eval_steps_per_second": 0.959, |
|
"learning_rate": 0.001, |
|
"step": 3982 |
|
}, |
|
{ |
|
"epoch": 22.099447513812155, |
|
"grad_norm": 0.1797100454568863, |
|
"learning_rate": 0.001, |
|
"loss": 0.3808, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_explained_variance": 0.3736427976534917, |
|
"eval_loss": 0.3633384704589844, |
|
"eval_mae": 0.1356455683708191, |
|
"eval_mse": 0.039849139750003815, |
|
"eval_r2": 0.37049292256309013, |
|
"eval_rmse": 0.1996224969625473, |
|
"eval_runtime": 63.5905, |
|
"eval_samples_per_second": 60.449, |
|
"eval_steps_per_second": 0.959, |
|
"learning_rate": 0.001, |
|
"step": 4163 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_explained_variance": 0.3761314291220445, |
|
"eval_loss": 0.3632254898548126, |
|
"eval_mae": 0.13934393227100372, |
|
"eval_mse": 0.03965350612998009, |
|
"eval_r2": 0.3725149173190659, |
|
"eval_rmse": 0.19913187623023987, |
|
"eval_runtime": 63.5074, |
|
"eval_samples_per_second": 60.528, |
|
"eval_steps_per_second": 0.961, |
|
"learning_rate": 0.001, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 24.861878453038674, |
|
"grad_norm": 0.10225138068199158, |
|
"learning_rate": 0.001, |
|
"loss": 0.3796, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_explained_variance": 0.37342833555661714, |
|
"eval_loss": 0.3638208210468292, |
|
"eval_mae": 0.13812901079654694, |
|
"eval_mse": 0.03988226130604744, |
|
"eval_r2": 0.3698107462777432, |
|
"eval_rmse": 0.19970543682575226, |
|
"eval_runtime": 64.082, |
|
"eval_samples_per_second": 59.986, |
|
"eval_steps_per_second": 0.952, |
|
"learning_rate": 0.001, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_explained_variance": 0.38356072627581084, |
|
"eval_loss": 0.3607248365879059, |
|
"eval_mae": 0.132920041680336, |
|
"eval_mse": 0.03901772201061249, |
|
"eval_r2": 0.3818014601715421, |
|
"eval_rmse": 0.19752904772758484, |
|
"eval_runtime": 63.8273, |
|
"eval_samples_per_second": 60.225, |
|
"eval_steps_per_second": 0.956, |
|
"learning_rate": 0.0001, |
|
"step": 4706 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_explained_variance": 0.3877932016666119, |
|
"eval_loss": 0.3599555194377899, |
|
"eval_mae": 0.13530299067497253, |
|
"eval_mse": 0.038680098950862885, |
|
"eval_r2": 0.3862897971569748, |
|
"eval_rmse": 0.19667257368564606, |
|
"eval_runtime": 63.6171, |
|
"eval_samples_per_second": 60.424, |
|
"eval_steps_per_second": 0.959, |
|
"learning_rate": 0.0001, |
|
"step": 4887 |
|
}, |
|
{ |
|
"epoch": 27.624309392265193, |
|
"grad_norm": 0.09920254349708557, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3765, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_explained_variance": 0.39040088195067185, |
|
"eval_loss": 0.35923057794570923, |
|
"eval_mae": 0.13371111452579498, |
|
"eval_mse": 0.038444750010967255, |
|
"eval_r2": 0.3893828319749203, |
|
"eval_rmse": 0.19607332348823547, |
|
"eval_runtime": 63.6463, |
|
"eval_samples_per_second": 60.396, |
|
"eval_steps_per_second": 0.958, |
|
"learning_rate": 0.0001, |
|
"step": 5068 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_explained_variance": 0.39147963432165295, |
|
"eval_loss": 0.3595493733882904, |
|
"eval_mae": 0.13497120141983032, |
|
"eval_mse": 0.03846590965986252, |
|
"eval_r2": 0.3891551349793923, |
|
"eval_rmse": 0.1961272805929184, |
|
"eval_runtime": 63.7787, |
|
"eval_samples_per_second": 60.271, |
|
"eval_steps_per_second": 0.956, |
|
"learning_rate": 0.0001, |
|
"step": 5249 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_explained_variance": 0.3893452011621915, |
|
"eval_loss": 0.35978832840919495, |
|
"eval_mae": 0.13498304784297943, |
|
"eval_mse": 0.03862994909286499, |
|
"eval_r2": 0.3876274861623127, |
|
"eval_rmse": 0.19654503464698792, |
|
"eval_runtime": 63.918, |
|
"eval_samples_per_second": 60.14, |
|
"eval_steps_per_second": 0.954, |
|
"learning_rate": 0.0001, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 30.386740331491712, |
|
"grad_norm": 0.09680859744548798, |
|
"learning_rate": 0.0001, |
|
"loss": 0.373, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_explained_variance": 0.39206390655957735, |
|
"eval_loss": 0.35871124267578125, |
|
"eval_mae": 0.131711944937706, |
|
"eval_mse": 0.03838532418012619, |
|
"eval_r2": 0.39069464683386806, |
|
"eval_rmse": 0.19592173397541046, |
|
"eval_runtime": 63.6009, |
|
"eval_samples_per_second": 60.439, |
|
"eval_steps_per_second": 0.959, |
|
"learning_rate": 0.0001, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_explained_variance": 0.39324428943487316, |
|
"eval_loss": 0.35840144753456116, |
|
"eval_mae": 0.13263028860092163, |
|
"eval_mse": 0.0382704883813858, |
|
"eval_r2": 0.39277553504116497, |
|
"eval_rmse": 0.19562844932079315, |
|
"eval_runtime": 63.4174, |
|
"eval_samples_per_second": 60.614, |
|
"eval_steps_per_second": 0.962, |
|
"learning_rate": 0.0001, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_explained_variance": 0.3953018326025743, |
|
"eval_loss": 0.35809990763664246, |
|
"eval_mae": 0.13110357522964478, |
|
"eval_mse": 0.03812328726053238, |
|
"eval_r2": 0.39453575095056653, |
|
"eval_rmse": 0.19525185227394104, |
|
"eval_runtime": 62.9848, |
|
"eval_samples_per_second": 61.031, |
|
"eval_steps_per_second": 0.968, |
|
"learning_rate": 0.0001, |
|
"step": 5973 |
|
}, |
|
{ |
|
"epoch": 33.149171270718234, |
|
"grad_norm": 0.10557221621274948, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3735, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_explained_variance": 0.3966822119859549, |
|
"eval_loss": 0.3580343723297119, |
|
"eval_mae": 0.13232208788394928, |
|
"eval_mse": 0.038077060133218765, |
|
"eval_r2": 0.3953078482977419, |
|
"eval_rmse": 0.19513344764709473, |
|
"eval_runtime": 63.9448, |
|
"eval_samples_per_second": 60.114, |
|
"eval_steps_per_second": 0.954, |
|
"learning_rate": 0.0001, |
|
"step": 6154 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_explained_variance": 0.39542460441589355, |
|
"eval_loss": 0.3578670918941498, |
|
"eval_mae": 0.13223391771316528, |
|
"eval_mse": 0.038055673241615295, |
|
"eval_r2": 0.3949423136793632, |
|
"eval_rmse": 0.19507862627506256, |
|
"eval_runtime": 62.8884, |
|
"eval_samples_per_second": 61.124, |
|
"eval_steps_per_second": 0.97, |
|
"learning_rate": 0.0001, |
|
"step": 6335 |
|
}, |
|
{ |
|
"epoch": 35.91160220994475, |
|
"grad_norm": 0.11413700878620148, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3711, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_explained_variance": 0.38986305548594546, |
|
"eval_loss": 0.35921958088874817, |
|
"eval_mae": 0.13451573252677917, |
|
"eval_mse": 0.0385238379240036, |
|
"eval_r2": 0.3895210802446932, |
|
"eval_rmse": 0.19627490639686584, |
|
"eval_runtime": 63.9244, |
|
"eval_samples_per_second": 60.134, |
|
"eval_steps_per_second": 0.954, |
|
"learning_rate": 0.0001, |
|
"step": 6516 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_explained_variance": 0.39700071628277117, |
|
"eval_loss": 0.35754600167274475, |
|
"eval_mae": 0.13133254647254944, |
|
"eval_mse": 0.037971220910549164, |
|
"eval_r2": 0.3965857587136563, |
|
"eval_rmse": 0.19486205279827118, |
|
"eval_runtime": 63.3201, |
|
"eval_samples_per_second": 60.707, |
|
"eval_steps_per_second": 0.963, |
|
"learning_rate": 0.0001, |
|
"step": 6697 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_explained_variance": 0.39355502220300526, |
|
"eval_loss": 0.35816583037376404, |
|
"eval_mae": 0.13258841633796692, |
|
"eval_mse": 0.038258858025074005, |
|
"eval_r2": 0.39226546341596713, |
|
"eval_rmse": 0.19559872150421143, |
|
"eval_runtime": 62.6934, |
|
"eval_samples_per_second": 61.314, |
|
"eval_steps_per_second": 0.973, |
|
"learning_rate": 0.0001, |
|
"step": 6878 |
|
}, |
|
{ |
|
"epoch": 38.67403314917127, |
|
"grad_norm": 0.147694930434227, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3705, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_explained_variance": 0.3965281844139099, |
|
"eval_loss": 0.3575587570667267, |
|
"eval_mae": 0.1313440054655075, |
|
"eval_mse": 0.03796360641717911, |
|
"eval_r2": 0.39630358388937376, |
|
"eval_rmse": 0.19484251737594604, |
|
"eval_runtime": 62.5891, |
|
"eval_samples_per_second": 61.416, |
|
"eval_steps_per_second": 0.975, |
|
"learning_rate": 0.0001, |
|
"step": 7059 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_explained_variance": 0.399988224873176, |
|
"eval_loss": 0.3574675917625427, |
|
"eval_mae": 0.13325949013233185, |
|
"eval_mse": 0.03790339455008507, |
|
"eval_r2": 0.3980004685467563, |
|
"eval_rmse": 0.19468794763088226, |
|
"eval_runtime": 63.1438, |
|
"eval_samples_per_second": 60.877, |
|
"eval_steps_per_second": 0.966, |
|
"learning_rate": 0.0001, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_explained_variance": 0.39883482914704543, |
|
"eval_loss": 0.35797080397605896, |
|
"eval_mae": 0.13172872364521027, |
|
"eval_mse": 0.03810995817184448, |
|
"eval_r2": 0.3955525420135218, |
|
"eval_rmse": 0.19521771371364594, |
|
"eval_runtime": 63.8253, |
|
"eval_samples_per_second": 60.227, |
|
"eval_steps_per_second": 0.956, |
|
"learning_rate": 0.0001, |
|
"step": 7421 |
|
}, |
|
{ |
|
"epoch": 41.43646408839779, |
|
"grad_norm": 0.13456250727176666, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3704, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_explained_variance": 0.39858559003243077, |
|
"eval_loss": 0.3574862778186798, |
|
"eval_mae": 0.13303333520889282, |
|
"eval_mse": 0.03798728436231613, |
|
"eval_r2": 0.39695276377811434, |
|
"eval_rmse": 0.19490326941013336, |
|
"eval_runtime": 67.394, |
|
"eval_samples_per_second": 57.038, |
|
"eval_steps_per_second": 0.905, |
|
"learning_rate": 0.0001, |
|
"step": 7602 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_explained_variance": 0.40196093229147106, |
|
"eval_loss": 0.3568632900714874, |
|
"eval_mae": 0.13252291083335876, |
|
"eval_mse": 0.03772151470184326, |
|
"eval_r2": 0.4008098201061217, |
|
"eval_rmse": 0.19422027468681335, |
|
"eval_runtime": 64.6291, |
|
"eval_samples_per_second": 59.478, |
|
"eval_steps_per_second": 0.944, |
|
"learning_rate": 0.0001, |
|
"step": 7783 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_explained_variance": 0.4026290269998404, |
|
"eval_loss": 0.35680440068244934, |
|
"eval_mae": 0.13054220378398895, |
|
"eval_mse": 0.03770707920193672, |
|
"eval_r2": 0.4009435040202465, |
|
"eval_rmse": 0.1941831111907959, |
|
"eval_runtime": 64.3612, |
|
"eval_samples_per_second": 59.725, |
|
"eval_steps_per_second": 0.948, |
|
"learning_rate": 0.0001, |
|
"step": 7964 |
|
}, |
|
{ |
|
"epoch": 44.19889502762431, |
|
"grad_norm": 0.12347038835287094, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3695, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_explained_variance": 0.40327414182516247, |
|
"eval_loss": 0.35672253370285034, |
|
"eval_mae": 0.13190330564975739, |
|
"eval_mse": 0.03762032091617584, |
|
"eval_r2": 0.40209036636711937, |
|
"eval_rmse": 0.193959578871727, |
|
"eval_runtime": 63.5564, |
|
"eval_samples_per_second": 60.482, |
|
"eval_steps_per_second": 0.96, |
|
"learning_rate": 0.0001, |
|
"step": 8145 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_explained_variance": 0.4014772314291734, |
|
"eval_loss": 0.35691043734550476, |
|
"eval_mae": 0.1298011690378189, |
|
"eval_mse": 0.03774061053991318, |
|
"eval_r2": 0.39979262898816803, |
|
"eval_rmse": 0.19426943361759186, |
|
"eval_runtime": 63.5835, |
|
"eval_samples_per_second": 60.456, |
|
"eval_steps_per_second": 0.959, |
|
"learning_rate": 0.0001, |
|
"step": 8326 |
|
}, |
|
{ |
|
"epoch": 46.96132596685083, |
|
"grad_norm": 0.1476801335811615, |
|
"learning_rate": 0.0001, |
|
"loss": 0.369, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_explained_variance": 0.39959606299033534, |
|
"eval_loss": 0.3573501706123352, |
|
"eval_mae": 0.12922033667564392, |
|
"eval_mse": 0.03795965388417244, |
|
"eval_r2": 0.39734844502667516, |
|
"eval_rmse": 0.19483236968517303, |
|
"eval_runtime": 64.1983, |
|
"eval_samples_per_second": 59.877, |
|
"eval_steps_per_second": 0.95, |
|
"learning_rate": 0.0001, |
|
"step": 8507 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_explained_variance": 0.404104429941911, |
|
"eval_loss": 0.35634738206863403, |
|
"eval_mae": 0.13015295565128326, |
|
"eval_mse": 0.03764864429831505, |
|
"eval_r2": 0.4019054071784941, |
|
"eval_rmse": 0.19403257966041565, |
|
"eval_runtime": 63.8043, |
|
"eval_samples_per_second": 60.247, |
|
"eval_steps_per_second": 0.956, |
|
"learning_rate": 0.0001, |
|
"step": 8688 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_explained_variance": 0.4024105530518752, |
|
"eval_loss": 0.3566192090511322, |
|
"eval_mae": 0.1305515021085739, |
|
"eval_mse": 0.03765449672937393, |
|
"eval_r2": 0.40112185894390806, |
|
"eval_rmse": 0.19404765963554382, |
|
"eval_runtime": 65.5486, |
|
"eval_samples_per_second": 58.644, |
|
"eval_steps_per_second": 0.931, |
|
"learning_rate": 0.0001, |
|
"step": 8869 |
|
}, |
|
{ |
|
"epoch": 49.72375690607735, |
|
"grad_norm": 0.17585940659046173, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3691, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_explained_variance": 0.40147255475704485, |
|
"eval_loss": 0.3571104109287262, |
|
"eval_mae": 0.13218748569488525, |
|
"eval_mse": 0.0377979539334774, |
|
"eval_r2": 0.39978904676068683, |
|
"eval_rmse": 0.19441695511341095, |
|
"eval_runtime": 64.6683, |
|
"eval_samples_per_second": 59.442, |
|
"eval_steps_per_second": 0.943, |
|
"learning_rate": 0.0001, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_explained_variance": 0.4020539063673753, |
|
"eval_loss": 0.3584417402744293, |
|
"eval_mae": 0.13350461423397064, |
|
"eval_mse": 0.03811892494559288, |
|
"eval_r2": 0.39583579837070054, |
|
"eval_rmse": 0.19524069130420685, |
|
"eval_runtime": 64.7621, |
|
"eval_samples_per_second": 59.356, |
|
"eval_steps_per_second": 0.942, |
|
"learning_rate": 0.0001, |
|
"step": 9231 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_explained_variance": 0.4045378336539635, |
|
"eval_loss": 0.3561328649520874, |
|
"eval_mae": 0.1308905929327011, |
|
"eval_mse": 0.03748491033911705, |
|
"eval_r2": 0.4042346756357482, |
|
"eval_rmse": 0.19361020624637604, |
|
"eval_runtime": 64.77, |
|
"eval_samples_per_second": 59.349, |
|
"eval_steps_per_second": 0.942, |
|
"learning_rate": 0.0001, |
|
"step": 9412 |
|
}, |
|
{ |
|
"epoch": 52.48618784530387, |
|
"grad_norm": 0.15689648687839508, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3677, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_explained_variance": 0.4053275997822101, |
|
"eval_loss": 0.35652926564216614, |
|
"eval_mae": 0.13147617876529694, |
|
"eval_mse": 0.03759394586086273, |
|
"eval_r2": 0.4026062075156075, |
|
"eval_rmse": 0.19389158487319946, |
|
"eval_runtime": 64.8021, |
|
"eval_samples_per_second": 59.319, |
|
"eval_steps_per_second": 0.941, |
|
"learning_rate": 0.0001, |
|
"step": 9593 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_explained_variance": 0.401798074062054, |
|
"eval_loss": 0.3567388355731964, |
|
"eval_mae": 0.13164331018924713, |
|
"eval_mse": 0.03773793205618858, |
|
"eval_r2": 0.40105556644385676, |
|
"eval_rmse": 0.1942625343799591, |
|
"eval_runtime": 65.4024, |
|
"eval_samples_per_second": 58.775, |
|
"eval_steps_per_second": 0.933, |
|
"learning_rate": 0.0001, |
|
"step": 9774 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_explained_variance": 0.40524112719755906, |
|
"eval_loss": 0.35645580291748047, |
|
"eval_mae": 0.1291799694299698, |
|
"eval_mse": 0.03761202096939087, |
|
"eval_r2": 0.40258003846192925, |
|
"eval_rmse": 0.19393819570541382, |
|
"eval_runtime": 65.1148, |
|
"eval_samples_per_second": 59.034, |
|
"eval_steps_per_second": 0.937, |
|
"learning_rate": 0.0001, |
|
"step": 9955 |
|
}, |
|
{ |
|
"epoch": 55.248618784530386, |
|
"grad_norm": 0.14432880282402039, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3684, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_explained_variance": 0.40458508179737973, |
|
"eval_loss": 0.35665351152420044, |
|
"eval_mae": 0.12790292501449585, |
|
"eval_mse": 0.03767779469490051, |
|
"eval_r2": 0.40173746899832624, |
|
"eval_rmse": 0.19410768151283264, |
|
"eval_runtime": 64.7859, |
|
"eval_samples_per_second": 59.334, |
|
"eval_steps_per_second": 0.942, |
|
"learning_rate": 0.0001, |
|
"step": 10136 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_explained_variance": 0.40489131670731765, |
|
"eval_loss": 0.35622259974479675, |
|
"eval_mae": 0.12940338253974915, |
|
"eval_mse": 0.03757502883672714, |
|
"eval_r2": 0.40317412718530543, |
|
"eval_rmse": 0.1938427984714508, |
|
"eval_runtime": 64.354, |
|
"eval_samples_per_second": 59.732, |
|
"eval_steps_per_second": 0.948, |
|
"learning_rate": 0.0001, |
|
"step": 10317 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_explained_variance": 0.40618401765823364, |
|
"eval_loss": 0.35649776458740234, |
|
"eval_mae": 0.12992320954799652, |
|
"eval_mse": 0.03755363076925278, |
|
"eval_r2": 0.40359610267984325, |
|
"eval_rmse": 0.1937875896692276, |
|
"eval_runtime": 63.5875, |
|
"eval_samples_per_second": 60.452, |
|
"eval_steps_per_second": 0.959, |
|
"learning_rate": 0.0001, |
|
"step": 10498 |
|
}, |
|
{ |
|
"epoch": 58.011049723756905, |
|
"grad_norm": 0.17977654933929443, |
|
"learning_rate": 1e-05, |
|
"loss": 0.368, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_explained_variance": 0.40612818186099714, |
|
"eval_loss": 0.3559414744377136, |
|
"eval_mae": 0.1292232871055603, |
|
"eval_mse": 0.037484604865312576, |
|
"eval_r2": 0.404684355302516, |
|
"eval_rmse": 0.19360941648483276, |
|
"eval_runtime": 63.0292, |
|
"eval_samples_per_second": 60.988, |
|
"eval_steps_per_second": 0.968, |
|
"learning_rate": 1e-05, |
|
"step": 10679 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_explained_variance": 0.4082453021636376, |
|
"eval_loss": 0.35587525367736816, |
|
"eval_mae": 0.1295480728149414, |
|
"eval_mse": 0.03739844262599945, |
|
"eval_r2": 0.40598491760734956, |
|
"eval_rmse": 0.1933867633342743, |
|
"eval_runtime": 67.1089, |
|
"eval_samples_per_second": 57.28, |
|
"eval_steps_per_second": 0.909, |
|
"learning_rate": 1e-05, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 60.773480662983424, |
|
"grad_norm": 0.1965423822402954, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3664, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_explained_variance": 0.4074813173367427, |
|
"eval_loss": 0.35549554228782654, |
|
"eval_mae": 0.13036619126796722, |
|
"eval_mse": 0.03731352090835571, |
|
"eval_r2": 0.40719759569271147, |
|
"eval_rmse": 0.1931670755147934, |
|
"eval_runtime": 62.4919, |
|
"eval_samples_per_second": 61.512, |
|
"eval_steps_per_second": 0.976, |
|
"learning_rate": 1e-05, |
|
"step": 11041 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_explained_variance": 0.4057550017650311, |
|
"eval_loss": 0.3564907908439636, |
|
"eval_mae": 0.13166674971580505, |
|
"eval_mse": 0.03761378303170204, |
|
"eval_r2": 0.4036480162510964, |
|
"eval_rmse": 0.19394272565841675, |
|
"eval_runtime": 64.0633, |
|
"eval_samples_per_second": 60.003, |
|
"eval_steps_per_second": 0.952, |
|
"learning_rate": 1e-05, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_explained_variance": 0.4086620624248798, |
|
"eval_loss": 0.35556313395500183, |
|
"eval_mae": 0.12934741377830505, |
|
"eval_mse": 0.03726600110530853, |
|
"eval_r2": 0.40751167332410276, |
|
"eval_rmse": 0.1930440366268158, |
|
"eval_runtime": 63.2366, |
|
"eval_samples_per_second": 60.788, |
|
"eval_steps_per_second": 0.965, |
|
"learning_rate": 1e-05, |
|
"step": 11403 |
|
}, |
|
{ |
|
"epoch": 63.53591160220994, |
|
"grad_norm": 0.1525866687297821, |
|
"learning_rate": 1e-05, |
|
"loss": 0.366, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_explained_variance": 0.40886356280400205, |
|
"eval_loss": 0.35541364550590515, |
|
"eval_mae": 0.1295996755361557, |
|
"eval_mse": 0.03727412968873978, |
|
"eval_r2": 0.40770017250386054, |
|
"eval_rmse": 0.1930650919675827, |
|
"eval_runtime": 63.8539, |
|
"eval_samples_per_second": 60.2, |
|
"eval_steps_per_second": 0.955, |
|
"learning_rate": 1e-05, |
|
"step": 11584 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_explained_variance": 0.40589494430101836, |
|
"eval_loss": 0.35602322220802307, |
|
"eval_mae": 0.13072702288627625, |
|
"eval_mse": 0.03753972053527832, |
|
"eval_r2": 0.4048648390836954, |
|
"eval_rmse": 0.19375169277191162, |
|
"eval_runtime": 63.5254, |
|
"eval_samples_per_second": 60.511, |
|
"eval_steps_per_second": 0.96, |
|
"learning_rate": 1e-05, |
|
"step": 11765 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_explained_variance": 0.4085214688227727, |
|
"eval_loss": 0.35534363985061646, |
|
"eval_mae": 0.13003438711166382, |
|
"eval_mse": 0.03723596781492233, |
|
"eval_r2": 0.40801214840672984, |
|
"eval_rmse": 0.19296623766422272, |
|
"eval_runtime": 66.0061, |
|
"eval_samples_per_second": 58.237, |
|
"eval_steps_per_second": 0.924, |
|
"learning_rate": 1e-05, |
|
"step": 11946 |
|
}, |
|
{ |
|
"epoch": 66.29834254143647, |
|
"grad_norm": 0.18801870942115784, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3654, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_explained_variance": 0.4081741479726938, |
|
"eval_loss": 0.3554227948188782, |
|
"eval_mae": 0.12988974153995514, |
|
"eval_mse": 0.03726029023528099, |
|
"eval_r2": 0.4077790726698564, |
|
"eval_rmse": 0.1930292397737503, |
|
"eval_runtime": 65.2859, |
|
"eval_samples_per_second": 58.879, |
|
"eval_steps_per_second": 0.934, |
|
"learning_rate": 1e-05, |
|
"step": 12127 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_explained_variance": 0.4073961698091947, |
|
"eval_loss": 0.35557952523231506, |
|
"eval_mae": 0.13015064597129822, |
|
"eval_mse": 0.03740492835640907, |
|
"eval_r2": 0.4058588832439236, |
|
"eval_rmse": 0.19340354204177856, |
|
"eval_runtime": 65.1267, |
|
"eval_samples_per_second": 59.023, |
|
"eval_steps_per_second": 0.937, |
|
"learning_rate": 1e-05, |
|
"step": 12308 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_explained_variance": 0.4085943423784696, |
|
"eval_loss": 0.3553701937198639, |
|
"eval_mae": 0.12976409494876862, |
|
"eval_mse": 0.03725024312734604, |
|
"eval_r2": 0.40825238050595736, |
|
"eval_rmse": 0.19300322234630585, |
|
"eval_runtime": 64.7301, |
|
"eval_samples_per_second": 59.385, |
|
"eval_steps_per_second": 0.942, |
|
"learning_rate": 1e-05, |
|
"step": 12489 |
|
}, |
|
{ |
|
"epoch": 69.06077348066299, |
|
"grad_norm": 0.15430860221385956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3658, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_explained_variance": 0.4094207286834717, |
|
"eval_loss": 0.35594871640205383, |
|
"eval_mae": 0.13069316744804382, |
|
"eval_mse": 0.03737233206629753, |
|
"eval_r2": 0.40659481251933094, |
|
"eval_rmse": 0.19331924617290497, |
|
"eval_runtime": 66.4386, |
|
"eval_samples_per_second": 57.858, |
|
"eval_steps_per_second": 0.918, |
|
"learning_rate": 1e-05, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_explained_variance": 0.40725430158468395, |
|
"eval_loss": 0.35573798418045044, |
|
"eval_mae": 0.1295761913061142, |
|
"eval_mse": 0.037380401045084, |
|
"eval_r2": 0.40697699949296745, |
|
"eval_rmse": 0.19334012269973755, |
|
"eval_runtime": 65.624, |
|
"eval_samples_per_second": 58.576, |
|
"eval_steps_per_second": 0.93, |
|
"learning_rate": 1e-05, |
|
"step": 12851 |
|
}, |
|
{ |
|
"epoch": 71.8232044198895, |
|
"grad_norm": 0.35482099652290344, |
|
"learning_rate": 1e-05, |
|
"loss": 0.366, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_explained_variance": 0.40842239214823794, |
|
"eval_loss": 0.35571375489234924, |
|
"eval_mae": 0.13028408586978912, |
|
"eval_mse": 0.03734128177165985, |
|
"eval_r2": 0.40698361470433536, |
|
"eval_rmse": 0.19323892891407013, |
|
"eval_runtime": 64.0529, |
|
"eval_samples_per_second": 60.013, |
|
"eval_steps_per_second": 0.952, |
|
"learning_rate": 1e-05, |
|
"step": 13032 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_explained_variance": 0.4089708603345431, |
|
"eval_loss": 0.3552262485027313, |
|
"eval_mae": 0.12985268235206604, |
|
"eval_mse": 0.037223465740680695, |
|
"eval_r2": 0.408222457948687, |
|
"eval_rmse": 0.1929338425397873, |
|
"eval_runtime": 65.5971, |
|
"eval_samples_per_second": 58.6, |
|
"eval_steps_per_second": 0.93, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13213 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_explained_variance": 0.40937405824661255, |
|
"eval_loss": 0.35516515374183655, |
|
"eval_mae": 0.1281428188085556, |
|
"eval_mse": 0.03721009939908981, |
|
"eval_r2": 0.4087432799234766, |
|
"eval_rmse": 0.1928991973400116, |
|
"eval_runtime": 63.5094, |
|
"eval_samples_per_second": 60.526, |
|
"eval_steps_per_second": 0.96, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13394 |
|
}, |
|
{ |
|
"epoch": 74.58563535911603, |
|
"grad_norm": 0.20831693708896637, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.3654, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_explained_variance": 0.40568819871315587, |
|
"eval_loss": 0.3558255434036255, |
|
"eval_mae": 0.13025221228599548, |
|
"eval_mse": 0.037474822252988815, |
|
"eval_r2": 0.40474793306670837, |
|
"eval_rmse": 0.193584144115448, |
|
"eval_runtime": 63.853, |
|
"eval_samples_per_second": 60.201, |
|
"eval_steps_per_second": 0.955, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_explained_variance": 0.408390985085414, |
|
"eval_loss": 0.3555220663547516, |
|
"eval_mae": 0.12769028544425964, |
|
"eval_mse": 0.03735670447349548, |
|
"eval_r2": 0.40610327648301114, |
|
"eval_rmse": 0.19327881932258606, |
|
"eval_runtime": 66.3493, |
|
"eval_samples_per_second": 57.936, |
|
"eval_steps_per_second": 0.919, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13756 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_explained_variance": 0.4046147374006418, |
|
"eval_loss": 0.35615718364715576, |
|
"eval_mae": 0.13205072283744812, |
|
"eval_mse": 0.037551261484622955, |
|
"eval_r2": 0.4042150129069256, |
|
"eval_rmse": 0.19378148019313812, |
|
"eval_runtime": 65.1729, |
|
"eval_samples_per_second": 58.982, |
|
"eval_steps_per_second": 0.936, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13937 |
|
}, |
|
{ |
|
"epoch": 77.34806629834254, |
|
"grad_norm": 0.20255261659622192, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.3663, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_explained_variance": 0.4090478007610028, |
|
"eval_loss": 0.35527750849723816, |
|
"eval_mae": 0.13062655925750732, |
|
"eval_mse": 0.037214502692222595, |
|
"eval_r2": 0.4086604768416133, |
|
"eval_rmse": 0.19291061162948608, |
|
"eval_runtime": 66.5281, |
|
"eval_samples_per_second": 57.78, |
|
"eval_steps_per_second": 0.917, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 14118 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_explained_variance": 0.4019758334526649, |
|
"eval_loss": 0.3569395840167999, |
|
"eval_mae": 0.13103225827217102, |
|
"eval_mse": 0.037889137864112854, |
|
"eval_r2": 0.3999096598660514, |
|
"eval_rmse": 0.19465132057666779, |
|
"eval_runtime": 65.9236, |
|
"eval_samples_per_second": 58.31, |
|
"eval_steps_per_second": 0.925, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 14299 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_explained_variance": 0.4057845427439763, |
|
"eval_loss": 0.35627198219299316, |
|
"eval_mae": 0.13107524812221527, |
|
"eval_mse": 0.037464920431375504, |
|
"eval_r2": 0.40523034358958093, |
|
"eval_rmse": 0.19355857372283936, |
|
"eval_runtime": 66.5566, |
|
"eval_samples_per_second": 57.755, |
|
"eval_steps_per_second": 0.917, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 80.11049723756906, |
|
"grad_norm": 0.18743179738521576, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.3655, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_explained_variance": 0.4091951067631061, |
|
"eval_loss": 0.3555302619934082, |
|
"eval_mae": 0.13077440857887268, |
|
"eval_mse": 0.037267763167619705, |
|
"eval_r2": 0.4078657020062894, |
|
"eval_rmse": 0.1930485963821411, |
|
"eval_runtime": 67.6736, |
|
"eval_samples_per_second": 56.802, |
|
"eval_steps_per_second": 0.901, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 14661 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_explained_variance": 0.408656867650839, |
|
"eval_loss": 0.35563620924949646, |
|
"eval_mae": 0.13087815046310425, |
|
"eval_mse": 0.03731405362486839, |
|
"eval_r2": 0.4071799006076709, |
|
"eval_rmse": 0.19316846132278442, |
|
"eval_runtime": 68.4549, |
|
"eval_samples_per_second": 56.154, |
|
"eval_steps_per_second": 0.891, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 14842 |
|
}, |
|
{ |
|
"epoch": 82.87292817679558, |
|
"grad_norm": 0.20405510067939758, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.3651, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_explained_variance": 0.41021374555734486, |
|
"eval_loss": 0.35571029782295227, |
|
"eval_mae": 0.13036301732063293, |
|
"eval_mse": 0.03731907904148102, |
|
"eval_r2": 0.4073602568430592, |
|
"eval_rmse": 0.19318147003650665, |
|
"eval_runtime": 68.1325, |
|
"eval_samples_per_second": 56.419, |
|
"eval_steps_per_second": 0.895, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 15023 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_explained_variance": 0.4082063390658452, |
|
"eval_loss": 0.35581377148628235, |
|
"eval_mae": 0.1305844783782959, |
|
"eval_mse": 0.037393905222415924, |
|
"eval_r2": 0.4062799764902456, |
|
"eval_rmse": 0.19337503612041473, |
|
"eval_runtime": 66.3433, |
|
"eval_samples_per_second": 57.941, |
|
"eval_steps_per_second": 0.919, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 15204 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 15204, |
|
"total_flos": 2.180798470217171e+19, |
|
"train_loss": 0.37467605181350044, |
|
"train_runtime": 24668.9414, |
|
"train_samples_per_second": 46.707, |
|
"train_steps_per_second": 0.734 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 18100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.180798470217171e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|