{ "best_metric": 0.35516515374183655, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-large-2024_09_17-batch-size64_epochs100_freeze/checkpoint-13394", "epoch": 84.0, "eval_steps": 500, "global_step": 15204, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_explained_variance": 0.28046968350043666, "eval_loss": 0.38582414388656616, "eval_mae": 0.15708860754966736, "eval_mse": 0.04635250195860863, "eval_r2": 0.26238919671070565, "eval_rmse": 0.21529631316661835, "eval_runtime": 68.5532, "eval_samples_per_second": 56.073, "eval_steps_per_second": 0.89, "learning_rate": 0.001, "step": 181 }, { "epoch": 2.0, "eval_explained_variance": 0.32091750548436093, "eval_loss": 0.37635815143585205, "eval_mae": 0.1467229723930359, "eval_mse": 0.04398971050977707, "eval_r2": 0.3120521114085856, "eval_rmse": 0.20973724126815796, "eval_runtime": 65.3616, "eval_samples_per_second": 58.811, "eval_steps_per_second": 0.933, "learning_rate": 0.001, "step": 362 }, { "epoch": 2.7624309392265194, "grad_norm": 0.29469817876815796, "learning_rate": 0.001, "loss": 0.4473, "step": 500 }, { "epoch": 3.0, "eval_explained_variance": 0.3393913645010728, "eval_loss": 0.3715941309928894, "eval_mae": 0.1450481116771698, "eval_mse": 0.04250793904066086, "eval_r2": 0.33185449725151883, "eval_rmse": 0.20617453753948212, "eval_runtime": 65.4448, "eval_samples_per_second": 58.737, "eval_steps_per_second": 0.932, "learning_rate": 0.001, "step": 543 }, { "epoch": 4.0, "eval_explained_variance": 0.35663692767803484, "eval_loss": 0.3672849237918854, "eval_mae": 0.1395464986562729, "eval_mse": 0.0409623458981514, "eval_r2": 0.35477505001012255, "eval_rmse": 0.20239156484603882, "eval_runtime": 65.8223, "eval_samples_per_second": 58.4, "eval_steps_per_second": 0.927, "learning_rate": 0.001, "step": 724 }, { "epoch": 5.0, "eval_explained_variance": 0.3493932577279898, "eval_loss": 0.3692065477371216, "eval_mae": 0.1393202394247055, "eval_mse": 0.041857048869132996, "eval_r2": 0.3425061497286567, "eval_rmse": 0.20458994805812836, "eval_runtime": 66.3389, "eval_samples_per_second": 57.945, "eval_steps_per_second": 0.92, "learning_rate": 0.001, "step": 905 }, { "epoch": 5.524861878453039, "grad_norm": 0.19042304158210754, "learning_rate": 0.001, "loss": 0.3892, "step": 1000 }, { "epoch": 6.0, "eval_explained_variance": 0.35904277287996733, "eval_loss": 0.3672534227371216, "eval_mae": 0.14119164645671844, "eval_mse": 0.040877003222703934, "eval_r2": 0.3553590945142445, "eval_rmse": 0.2021806240081787, "eval_runtime": 65.5836, "eval_samples_per_second": 58.612, "eval_steps_per_second": 0.93, "learning_rate": 0.001, "step": 1086 }, { "epoch": 7.0, "eval_explained_variance": 0.34988729311869693, "eval_loss": 0.3680865168571472, "eval_mae": 0.14079739153385162, "eval_mse": 0.04153257608413696, "eval_r2": 0.3456613343062778, "eval_rmse": 0.2037954330444336, "eval_runtime": 64.4017, "eval_samples_per_second": 59.688, "eval_steps_per_second": 0.947, "learning_rate": 0.001, "step": 1267 }, { "epoch": 8.0, "eval_explained_variance": 0.36423414945602417, "eval_loss": 0.365603506565094, "eval_mae": 0.13892073929309845, "eval_mse": 0.04058730974793434, "eval_r2": 0.35962535995096967, "eval_rmse": 0.20146292448043823, "eval_runtime": 64.831, "eval_samples_per_second": 59.293, "eval_steps_per_second": 0.941, "learning_rate": 0.001, "step": 1448 }, { "epoch": 8.287292817679559, "grad_norm": 0.1760077178478241, "learning_rate": 0.001, "loss": 0.3855, "step": 1500 }, { "epoch": 9.0, "eval_explained_variance": 0.3612723258825449, "eval_loss": 0.36585840582847595, "eval_mae": 0.13438531756401062, "eval_mse": 0.04076695442199707, "eval_r2": 0.3554776353070419, "eval_rmse": 0.20190827548503876, "eval_runtime": 64.4706, "eval_samples_per_second": 59.624, "eval_steps_per_second": 0.946, "learning_rate": 0.001, "step": 1629 }, { "epoch": 10.0, "eval_explained_variance": 0.3561701728747441, "eval_loss": 0.366574227809906, "eval_mae": 0.13837845623493195, "eval_mse": 0.04093795642256737, "eval_r2": 0.3533183127533612, "eval_rmse": 0.2023313045501709, "eval_runtime": 63.2978, "eval_samples_per_second": 60.729, "eval_steps_per_second": 0.964, "learning_rate": 0.001, "step": 1810 }, { "epoch": 11.0, "eval_explained_variance": 0.3574172487625709, "eval_loss": 0.36660775542259216, "eval_mae": 0.13663478195667267, "eval_mse": 0.04090488329529762, "eval_r2": 0.35496352056496683, "eval_rmse": 0.20224955677986145, "eval_runtime": 66.2827, "eval_samples_per_second": 57.994, "eval_steps_per_second": 0.92, "learning_rate": 0.001, "step": 1991 }, { "epoch": 11.049723756906078, "grad_norm": 0.14891982078552246, "learning_rate": 0.001, "loss": 0.3816, "step": 2000 }, { "epoch": 12.0, "eval_explained_variance": 0.3598099580177894, "eval_loss": 0.36626219749450684, "eval_mae": 0.13958622515201569, "eval_mse": 0.04085636883974075, "eval_r2": 0.35871773520484396, "eval_rmse": 0.20212958753108978, "eval_runtime": 64.7339, "eval_samples_per_second": 59.382, "eval_steps_per_second": 0.942, "learning_rate": 0.001, "step": 2172 }, { "epoch": 13.0, "eval_explained_variance": 0.37047534722548264, "eval_loss": 0.3631901741027832, "eval_mae": 0.1360856592655182, "eval_mse": 0.03979066386818886, "eval_r2": 0.3696611807758026, "eval_rmse": 0.1994759738445282, "eval_runtime": 65.3689, "eval_samples_per_second": 58.805, "eval_steps_per_second": 0.933, "learning_rate": 0.001, "step": 2353 }, { "epoch": 13.812154696132596, "grad_norm": 0.14235170185565948, "learning_rate": 0.001, "loss": 0.381, "step": 2500 }, { "epoch": 14.0, "eval_explained_variance": 0.36284926304450404, "eval_loss": 0.36694806814193726, "eval_mae": 0.14229656755924225, "eval_mse": 0.04098258540034294, "eval_r2": 0.356153731540797, "eval_rmse": 0.20244155824184418, "eval_runtime": 64.126, "eval_samples_per_second": 59.945, "eval_steps_per_second": 0.951, "learning_rate": 0.001, "step": 2534 }, { "epoch": 15.0, "eval_explained_variance": 0.36449302159822905, "eval_loss": 0.3644973933696747, "eval_mae": 0.1395292580127716, "eval_mse": 0.04036581516265869, "eval_r2": 0.36203359510531696, "eval_rmse": 0.2009124606847763, "eval_runtime": 64.0305, "eval_samples_per_second": 60.034, "eval_steps_per_second": 0.953, "learning_rate": 0.001, "step": 2715 }, { "epoch": 16.0, "eval_explained_variance": 0.37152041838719296, "eval_loss": 0.36393943428993225, "eval_mae": 0.13569381833076477, "eval_mse": 0.039987124502658844, "eval_r2": 0.36948082804864185, "eval_rmse": 0.19996780157089233, "eval_runtime": 63.9139, "eval_samples_per_second": 60.143, "eval_steps_per_second": 0.954, "learning_rate": 0.001, "step": 2896 }, { "epoch": 16.574585635359117, "grad_norm": 0.13048891723155975, "learning_rate": 0.001, "loss": 0.3811, "step": 3000 }, { "epoch": 17.0, "eval_explained_variance": 0.37284482900912946, "eval_loss": 0.36665406823158264, "eval_mae": 0.14128881692886353, "eval_mse": 0.04064851254224777, "eval_r2": 0.3621847777710853, "eval_rmse": 0.20161476731300354, "eval_runtime": 66.0408, "eval_samples_per_second": 58.206, "eval_steps_per_second": 0.924, "learning_rate": 0.001, "step": 3077 }, { "epoch": 18.0, "eval_explained_variance": 0.3705295782822829, "eval_loss": 0.36318618059158325, "eval_mae": 0.13683417439460754, "eval_mse": 0.03981361910700798, "eval_r2": 0.369508628045091, "eval_rmse": 0.19953350722789764, "eval_runtime": 63.7575, "eval_samples_per_second": 60.291, "eval_steps_per_second": 0.957, "learning_rate": 0.001, "step": 3258 }, { "epoch": 19.0, "eval_explained_variance": 0.3733598177249615, "eval_loss": 0.36302879452705383, "eval_mae": 0.13539017736911774, "eval_mse": 0.03974781930446625, "eval_r2": 0.3718927441003872, "eval_rmse": 0.19936855137348175, "eval_runtime": 63.4414, "eval_samples_per_second": 60.591, "eval_steps_per_second": 0.962, "learning_rate": 0.001, "step": 3439 }, { "epoch": 19.337016574585636, "grad_norm": 0.13633792102336884, "learning_rate": 0.001, "loss": 0.3792, "step": 3500 }, { "epoch": 20.0, "eval_explained_variance": 0.3622324833503136, "eval_loss": 0.36489424109458923, "eval_mae": 0.13486731052398682, "eval_mse": 0.04052112251520157, "eval_r2": 0.35869592759647334, "eval_rmse": 0.20129859447479248, "eval_runtime": 64.295, "eval_samples_per_second": 59.787, "eval_steps_per_second": 0.949, "learning_rate": 0.001, "step": 3620 }, { "epoch": 21.0, "eval_explained_variance": 0.3630923858055702, "eval_loss": 0.3665030300617218, "eval_mae": 0.13610774278640747, "eval_mse": 0.040700096637010574, "eval_r2": 0.3584834523421166, "eval_rmse": 0.20174264907836914, "eval_runtime": 64.1739, "eval_samples_per_second": 59.9, "eval_steps_per_second": 0.951, "learning_rate": 0.001, "step": 3801 }, { "epoch": 22.0, "eval_explained_variance": 0.3704591485170218, "eval_loss": 0.3647814095020294, "eval_mae": 0.1368531733751297, "eval_mse": 0.03999844938516617, "eval_r2": 0.3677615209740873, "eval_rmse": 0.19999612867832184, "eval_runtime": 63.5961, "eval_samples_per_second": 60.444, "eval_steps_per_second": 0.959, "learning_rate": 0.001, "step": 3982 }, { "epoch": 22.099447513812155, "grad_norm": 0.1797100454568863, "learning_rate": 0.001, "loss": 0.3808, "step": 4000 }, { "epoch": 23.0, "eval_explained_variance": 0.3736427976534917, "eval_loss": 0.3633384704589844, "eval_mae": 0.1356455683708191, "eval_mse": 0.039849139750003815, "eval_r2": 0.37049292256309013, "eval_rmse": 0.1996224969625473, "eval_runtime": 63.5905, "eval_samples_per_second": 60.449, "eval_steps_per_second": 0.959, "learning_rate": 0.001, "step": 4163 }, { "epoch": 24.0, "eval_explained_variance": 0.3761314291220445, "eval_loss": 0.3632254898548126, "eval_mae": 0.13934393227100372, "eval_mse": 0.03965350612998009, "eval_r2": 0.3725149173190659, "eval_rmse": 0.19913187623023987, "eval_runtime": 63.5074, "eval_samples_per_second": 60.528, "eval_steps_per_second": 0.961, "learning_rate": 0.001, "step": 4344 }, { "epoch": 24.861878453038674, "grad_norm": 0.10225138068199158, "learning_rate": 0.001, "loss": 0.3796, "step": 4500 }, { "epoch": 25.0, "eval_explained_variance": 0.37342833555661714, "eval_loss": 0.3638208210468292, "eval_mae": 0.13812901079654694, "eval_mse": 0.03988226130604744, "eval_r2": 0.3698107462777432, "eval_rmse": 0.19970543682575226, "eval_runtime": 64.082, "eval_samples_per_second": 59.986, "eval_steps_per_second": 0.952, "learning_rate": 0.001, "step": 4525 }, { "epoch": 26.0, "eval_explained_variance": 0.38356072627581084, "eval_loss": 0.3607248365879059, "eval_mae": 0.132920041680336, "eval_mse": 0.03901772201061249, "eval_r2": 0.3818014601715421, "eval_rmse": 0.19752904772758484, "eval_runtime": 63.8273, "eval_samples_per_second": 60.225, "eval_steps_per_second": 0.956, "learning_rate": 0.0001, "step": 4706 }, { "epoch": 27.0, "eval_explained_variance": 0.3877932016666119, "eval_loss": 0.3599555194377899, "eval_mae": 0.13530299067497253, "eval_mse": 0.038680098950862885, "eval_r2": 0.3862897971569748, "eval_rmse": 0.19667257368564606, "eval_runtime": 63.6171, "eval_samples_per_second": 60.424, "eval_steps_per_second": 0.959, "learning_rate": 0.0001, "step": 4887 }, { "epoch": 27.624309392265193, "grad_norm": 0.09920254349708557, "learning_rate": 0.0001, "loss": 0.3765, "step": 5000 }, { "epoch": 28.0, "eval_explained_variance": 0.39040088195067185, "eval_loss": 0.35923057794570923, "eval_mae": 0.13371111452579498, "eval_mse": 0.038444750010967255, "eval_r2": 0.3893828319749203, "eval_rmse": 0.19607332348823547, "eval_runtime": 63.6463, "eval_samples_per_second": 60.396, "eval_steps_per_second": 0.958, "learning_rate": 0.0001, "step": 5068 }, { "epoch": 29.0, "eval_explained_variance": 0.39147963432165295, "eval_loss": 0.3595493733882904, "eval_mae": 0.13497120141983032, "eval_mse": 0.03846590965986252, "eval_r2": 0.3891551349793923, "eval_rmse": 0.1961272805929184, "eval_runtime": 63.7787, "eval_samples_per_second": 60.271, "eval_steps_per_second": 0.956, "learning_rate": 0.0001, "step": 5249 }, { "epoch": 30.0, "eval_explained_variance": 0.3893452011621915, "eval_loss": 0.35978832840919495, "eval_mae": 0.13498304784297943, "eval_mse": 0.03862994909286499, "eval_r2": 0.3876274861623127, "eval_rmse": 0.19654503464698792, "eval_runtime": 63.918, "eval_samples_per_second": 60.14, "eval_steps_per_second": 0.954, "learning_rate": 0.0001, "step": 5430 }, { "epoch": 30.386740331491712, "grad_norm": 0.09680859744548798, "learning_rate": 0.0001, "loss": 0.373, "step": 5500 }, { "epoch": 31.0, "eval_explained_variance": 0.39206390655957735, "eval_loss": 0.35871124267578125, "eval_mae": 0.131711944937706, "eval_mse": 0.03838532418012619, "eval_r2": 0.39069464683386806, "eval_rmse": 0.19592173397541046, "eval_runtime": 63.6009, "eval_samples_per_second": 60.439, "eval_steps_per_second": 0.959, "learning_rate": 0.0001, "step": 5611 }, { "epoch": 32.0, "eval_explained_variance": 0.39324428943487316, "eval_loss": 0.35840144753456116, "eval_mae": 0.13263028860092163, "eval_mse": 0.0382704883813858, "eval_r2": 0.39277553504116497, "eval_rmse": 0.19562844932079315, "eval_runtime": 63.4174, "eval_samples_per_second": 60.614, "eval_steps_per_second": 0.962, "learning_rate": 0.0001, "step": 5792 }, { "epoch": 33.0, "eval_explained_variance": 0.3953018326025743, "eval_loss": 0.35809990763664246, "eval_mae": 0.13110357522964478, "eval_mse": 0.03812328726053238, "eval_r2": 0.39453575095056653, "eval_rmse": 0.19525185227394104, "eval_runtime": 62.9848, "eval_samples_per_second": 61.031, "eval_steps_per_second": 0.968, "learning_rate": 0.0001, "step": 5973 }, { "epoch": 33.149171270718234, "grad_norm": 0.10557221621274948, "learning_rate": 0.0001, "loss": 0.3735, "step": 6000 }, { "epoch": 34.0, "eval_explained_variance": 0.3966822119859549, "eval_loss": 0.3580343723297119, "eval_mae": 0.13232208788394928, "eval_mse": 0.038077060133218765, "eval_r2": 0.3953078482977419, "eval_rmse": 0.19513344764709473, "eval_runtime": 63.9448, "eval_samples_per_second": 60.114, "eval_steps_per_second": 0.954, "learning_rate": 0.0001, "step": 6154 }, { "epoch": 35.0, "eval_explained_variance": 0.39542460441589355, "eval_loss": 0.3578670918941498, "eval_mae": 0.13223391771316528, "eval_mse": 0.038055673241615295, "eval_r2": 0.3949423136793632, "eval_rmse": 0.19507862627506256, "eval_runtime": 62.8884, "eval_samples_per_second": 61.124, "eval_steps_per_second": 0.97, "learning_rate": 0.0001, "step": 6335 }, { "epoch": 35.91160220994475, "grad_norm": 0.11413700878620148, "learning_rate": 0.0001, "loss": 0.3711, "step": 6500 }, { "epoch": 36.0, "eval_explained_variance": 0.38986305548594546, "eval_loss": 0.35921958088874817, "eval_mae": 0.13451573252677917, "eval_mse": 0.0385238379240036, "eval_r2": 0.3895210802446932, "eval_rmse": 0.19627490639686584, "eval_runtime": 63.9244, "eval_samples_per_second": 60.134, "eval_steps_per_second": 0.954, "learning_rate": 0.0001, "step": 6516 }, { "epoch": 37.0, "eval_explained_variance": 0.39700071628277117, "eval_loss": 0.35754600167274475, "eval_mae": 0.13133254647254944, "eval_mse": 0.037971220910549164, "eval_r2": 0.3965857587136563, "eval_rmse": 0.19486205279827118, "eval_runtime": 63.3201, "eval_samples_per_second": 60.707, "eval_steps_per_second": 0.963, "learning_rate": 0.0001, "step": 6697 }, { "epoch": 38.0, "eval_explained_variance": 0.39355502220300526, "eval_loss": 0.35816583037376404, "eval_mae": 0.13258841633796692, "eval_mse": 0.038258858025074005, "eval_r2": 0.39226546341596713, "eval_rmse": 0.19559872150421143, "eval_runtime": 62.6934, "eval_samples_per_second": 61.314, "eval_steps_per_second": 0.973, "learning_rate": 0.0001, "step": 6878 }, { "epoch": 38.67403314917127, "grad_norm": 0.147694930434227, "learning_rate": 0.0001, "loss": 0.3705, "step": 7000 }, { "epoch": 39.0, "eval_explained_variance": 0.3965281844139099, "eval_loss": 0.3575587570667267, "eval_mae": 0.1313440054655075, "eval_mse": 0.03796360641717911, "eval_r2": 0.39630358388937376, "eval_rmse": 0.19484251737594604, "eval_runtime": 62.5891, "eval_samples_per_second": 61.416, "eval_steps_per_second": 0.975, "learning_rate": 0.0001, "step": 7059 }, { "epoch": 40.0, "eval_explained_variance": 0.399988224873176, "eval_loss": 0.3574675917625427, "eval_mae": 0.13325949013233185, "eval_mse": 0.03790339455008507, "eval_r2": 0.3980004685467563, "eval_rmse": 0.19468794763088226, "eval_runtime": 63.1438, "eval_samples_per_second": 60.877, "eval_steps_per_second": 0.966, "learning_rate": 0.0001, "step": 7240 }, { "epoch": 41.0, "eval_explained_variance": 0.39883482914704543, "eval_loss": 0.35797080397605896, "eval_mae": 0.13172872364521027, "eval_mse": 0.03810995817184448, "eval_r2": 0.3955525420135218, "eval_rmse": 0.19521771371364594, "eval_runtime": 63.8253, "eval_samples_per_second": 60.227, "eval_steps_per_second": 0.956, "learning_rate": 0.0001, "step": 7421 }, { "epoch": 41.43646408839779, "grad_norm": 0.13456250727176666, "learning_rate": 0.0001, "loss": 0.3704, "step": 7500 }, { "epoch": 42.0, "eval_explained_variance": 0.39858559003243077, "eval_loss": 0.3574862778186798, "eval_mae": 0.13303333520889282, "eval_mse": 0.03798728436231613, "eval_r2": 0.39695276377811434, "eval_rmse": 0.19490326941013336, "eval_runtime": 67.394, "eval_samples_per_second": 57.038, "eval_steps_per_second": 0.905, "learning_rate": 0.0001, "step": 7602 }, { "epoch": 43.0, "eval_explained_variance": 0.40196093229147106, "eval_loss": 0.3568632900714874, "eval_mae": 0.13252291083335876, "eval_mse": 0.03772151470184326, "eval_r2": 0.4008098201061217, "eval_rmse": 0.19422027468681335, "eval_runtime": 64.6291, "eval_samples_per_second": 59.478, "eval_steps_per_second": 0.944, "learning_rate": 0.0001, "step": 7783 }, { "epoch": 44.0, "eval_explained_variance": 0.4026290269998404, "eval_loss": 0.35680440068244934, "eval_mae": 0.13054220378398895, "eval_mse": 0.03770707920193672, "eval_r2": 0.4009435040202465, "eval_rmse": 0.1941831111907959, "eval_runtime": 64.3612, "eval_samples_per_second": 59.725, "eval_steps_per_second": 0.948, "learning_rate": 0.0001, "step": 7964 }, { "epoch": 44.19889502762431, "grad_norm": 0.12347038835287094, "learning_rate": 0.0001, "loss": 0.3695, "step": 8000 }, { "epoch": 45.0, "eval_explained_variance": 0.40327414182516247, "eval_loss": 0.35672253370285034, "eval_mae": 0.13190330564975739, "eval_mse": 0.03762032091617584, "eval_r2": 0.40209036636711937, "eval_rmse": 0.193959578871727, "eval_runtime": 63.5564, "eval_samples_per_second": 60.482, "eval_steps_per_second": 0.96, "learning_rate": 0.0001, "step": 8145 }, { "epoch": 46.0, "eval_explained_variance": 0.4014772314291734, "eval_loss": 0.35691043734550476, "eval_mae": 0.1298011690378189, "eval_mse": 0.03774061053991318, "eval_r2": 0.39979262898816803, "eval_rmse": 0.19426943361759186, "eval_runtime": 63.5835, "eval_samples_per_second": 60.456, "eval_steps_per_second": 0.959, "learning_rate": 0.0001, "step": 8326 }, { "epoch": 46.96132596685083, "grad_norm": 0.1476801335811615, "learning_rate": 0.0001, "loss": 0.369, "step": 8500 }, { "epoch": 47.0, "eval_explained_variance": 0.39959606299033534, "eval_loss": 0.3573501706123352, "eval_mae": 0.12922033667564392, "eval_mse": 0.03795965388417244, "eval_r2": 0.39734844502667516, "eval_rmse": 0.19483236968517303, "eval_runtime": 64.1983, "eval_samples_per_second": 59.877, "eval_steps_per_second": 0.95, "learning_rate": 0.0001, "step": 8507 }, { "epoch": 48.0, "eval_explained_variance": 0.404104429941911, "eval_loss": 0.35634738206863403, "eval_mae": 0.13015295565128326, "eval_mse": 0.03764864429831505, "eval_r2": 0.4019054071784941, "eval_rmse": 0.19403257966041565, "eval_runtime": 63.8043, "eval_samples_per_second": 60.247, "eval_steps_per_second": 0.956, "learning_rate": 0.0001, "step": 8688 }, { "epoch": 49.0, "eval_explained_variance": 0.4024105530518752, "eval_loss": 0.3566192090511322, "eval_mae": 0.1305515021085739, "eval_mse": 0.03765449672937393, "eval_r2": 0.40112185894390806, "eval_rmse": 0.19404765963554382, "eval_runtime": 65.5486, "eval_samples_per_second": 58.644, "eval_steps_per_second": 0.931, "learning_rate": 0.0001, "step": 8869 }, { "epoch": 49.72375690607735, "grad_norm": 0.17585940659046173, "learning_rate": 0.0001, "loss": 0.3691, "step": 9000 }, { "epoch": 50.0, "eval_explained_variance": 0.40147255475704485, "eval_loss": 0.3571104109287262, "eval_mae": 0.13218748569488525, "eval_mse": 0.0377979539334774, "eval_r2": 0.39978904676068683, "eval_rmse": 0.19441695511341095, "eval_runtime": 64.6683, "eval_samples_per_second": 59.442, "eval_steps_per_second": 0.943, "learning_rate": 0.0001, "step": 9050 }, { "epoch": 51.0, "eval_explained_variance": 0.4020539063673753, "eval_loss": 0.3584417402744293, "eval_mae": 0.13350461423397064, "eval_mse": 0.03811892494559288, "eval_r2": 0.39583579837070054, "eval_rmse": 0.19524069130420685, "eval_runtime": 64.7621, "eval_samples_per_second": 59.356, "eval_steps_per_second": 0.942, "learning_rate": 0.0001, "step": 9231 }, { "epoch": 52.0, "eval_explained_variance": 0.4045378336539635, "eval_loss": 0.3561328649520874, "eval_mae": 0.1308905929327011, "eval_mse": 0.03748491033911705, "eval_r2": 0.4042346756357482, "eval_rmse": 0.19361020624637604, "eval_runtime": 64.77, "eval_samples_per_second": 59.349, "eval_steps_per_second": 0.942, "learning_rate": 0.0001, "step": 9412 }, { "epoch": 52.48618784530387, "grad_norm": 0.15689648687839508, "learning_rate": 0.0001, "loss": 0.3677, "step": 9500 }, { "epoch": 53.0, "eval_explained_variance": 0.4053275997822101, "eval_loss": 0.35652926564216614, "eval_mae": 0.13147617876529694, "eval_mse": 0.03759394586086273, "eval_r2": 0.4026062075156075, "eval_rmse": 0.19389158487319946, "eval_runtime": 64.8021, "eval_samples_per_second": 59.319, "eval_steps_per_second": 0.941, "learning_rate": 0.0001, "step": 9593 }, { "epoch": 54.0, "eval_explained_variance": 0.401798074062054, "eval_loss": 0.3567388355731964, "eval_mae": 0.13164331018924713, "eval_mse": 0.03773793205618858, "eval_r2": 0.40105556644385676, "eval_rmse": 0.1942625343799591, "eval_runtime": 65.4024, "eval_samples_per_second": 58.775, "eval_steps_per_second": 0.933, "learning_rate": 0.0001, "step": 9774 }, { "epoch": 55.0, "eval_explained_variance": 0.40524112719755906, "eval_loss": 0.35645580291748047, "eval_mae": 0.1291799694299698, "eval_mse": 0.03761202096939087, "eval_r2": 0.40258003846192925, "eval_rmse": 0.19393819570541382, "eval_runtime": 65.1148, "eval_samples_per_second": 59.034, "eval_steps_per_second": 0.937, "learning_rate": 0.0001, "step": 9955 }, { "epoch": 55.248618784530386, "grad_norm": 0.14432880282402039, "learning_rate": 0.0001, "loss": 0.3684, "step": 10000 }, { "epoch": 56.0, "eval_explained_variance": 0.40458508179737973, "eval_loss": 0.35665351152420044, "eval_mae": 0.12790292501449585, "eval_mse": 0.03767779469490051, "eval_r2": 0.40173746899832624, "eval_rmse": 0.19410768151283264, "eval_runtime": 64.7859, "eval_samples_per_second": 59.334, "eval_steps_per_second": 0.942, "learning_rate": 0.0001, "step": 10136 }, { "epoch": 57.0, "eval_explained_variance": 0.40489131670731765, "eval_loss": 0.35622259974479675, "eval_mae": 0.12940338253974915, "eval_mse": 0.03757502883672714, "eval_r2": 0.40317412718530543, "eval_rmse": 0.1938427984714508, "eval_runtime": 64.354, "eval_samples_per_second": 59.732, "eval_steps_per_second": 0.948, "learning_rate": 0.0001, "step": 10317 }, { "epoch": 58.0, "eval_explained_variance": 0.40618401765823364, "eval_loss": 0.35649776458740234, "eval_mae": 0.12992320954799652, "eval_mse": 0.03755363076925278, "eval_r2": 0.40359610267984325, "eval_rmse": 0.1937875896692276, "eval_runtime": 63.5875, "eval_samples_per_second": 60.452, "eval_steps_per_second": 0.959, "learning_rate": 0.0001, "step": 10498 }, { "epoch": 58.011049723756905, "grad_norm": 0.17977654933929443, "learning_rate": 1e-05, "loss": 0.368, "step": 10500 }, { "epoch": 59.0, "eval_explained_variance": 0.40612818186099714, "eval_loss": 0.3559414744377136, "eval_mae": 0.1292232871055603, "eval_mse": 0.037484604865312576, "eval_r2": 0.404684355302516, "eval_rmse": 0.19360941648483276, "eval_runtime": 63.0292, "eval_samples_per_second": 60.988, "eval_steps_per_second": 0.968, "learning_rate": 1e-05, "step": 10679 }, { "epoch": 60.0, "eval_explained_variance": 0.4082453021636376, "eval_loss": 0.35587525367736816, "eval_mae": 0.1295480728149414, "eval_mse": 0.03739844262599945, "eval_r2": 0.40598491760734956, "eval_rmse": 0.1933867633342743, "eval_runtime": 67.1089, "eval_samples_per_second": 57.28, "eval_steps_per_second": 0.909, "learning_rate": 1e-05, "step": 10860 }, { "epoch": 60.773480662983424, "grad_norm": 0.1965423822402954, "learning_rate": 1e-05, "loss": 0.3664, "step": 11000 }, { "epoch": 61.0, "eval_explained_variance": 0.4074813173367427, "eval_loss": 0.35549554228782654, "eval_mae": 0.13036619126796722, "eval_mse": 0.03731352090835571, "eval_r2": 0.40719759569271147, "eval_rmse": 0.1931670755147934, "eval_runtime": 62.4919, "eval_samples_per_second": 61.512, "eval_steps_per_second": 0.976, "learning_rate": 1e-05, "step": 11041 }, { "epoch": 62.0, "eval_explained_variance": 0.4057550017650311, "eval_loss": 0.3564907908439636, "eval_mae": 0.13166674971580505, "eval_mse": 0.03761378303170204, "eval_r2": 0.4036480162510964, "eval_rmse": 0.19394272565841675, "eval_runtime": 64.0633, "eval_samples_per_second": 60.003, "eval_steps_per_second": 0.952, "learning_rate": 1e-05, "step": 11222 }, { "epoch": 63.0, "eval_explained_variance": 0.4086620624248798, "eval_loss": 0.35556313395500183, "eval_mae": 0.12934741377830505, "eval_mse": 0.03726600110530853, "eval_r2": 0.40751167332410276, "eval_rmse": 0.1930440366268158, "eval_runtime": 63.2366, "eval_samples_per_second": 60.788, "eval_steps_per_second": 0.965, "learning_rate": 1e-05, "step": 11403 }, { "epoch": 63.53591160220994, "grad_norm": 0.1525866687297821, "learning_rate": 1e-05, "loss": 0.366, "step": 11500 }, { "epoch": 64.0, "eval_explained_variance": 0.40886356280400205, "eval_loss": 0.35541364550590515, "eval_mae": 0.1295996755361557, "eval_mse": 0.03727412968873978, "eval_r2": 0.40770017250386054, "eval_rmse": 0.1930650919675827, "eval_runtime": 63.8539, "eval_samples_per_second": 60.2, "eval_steps_per_second": 0.955, "learning_rate": 1e-05, "step": 11584 }, { "epoch": 65.0, "eval_explained_variance": 0.40589494430101836, "eval_loss": 0.35602322220802307, "eval_mae": 0.13072702288627625, "eval_mse": 0.03753972053527832, "eval_r2": 0.4048648390836954, "eval_rmse": 0.19375169277191162, "eval_runtime": 63.5254, "eval_samples_per_second": 60.511, "eval_steps_per_second": 0.96, "learning_rate": 1e-05, "step": 11765 }, { "epoch": 66.0, "eval_explained_variance": 0.4085214688227727, "eval_loss": 0.35534363985061646, "eval_mae": 0.13003438711166382, "eval_mse": 0.03723596781492233, "eval_r2": 0.40801214840672984, "eval_rmse": 0.19296623766422272, "eval_runtime": 66.0061, "eval_samples_per_second": 58.237, "eval_steps_per_second": 0.924, "learning_rate": 1e-05, "step": 11946 }, { "epoch": 66.29834254143647, "grad_norm": 0.18801870942115784, "learning_rate": 1e-05, "loss": 0.3654, "step": 12000 }, { "epoch": 67.0, "eval_explained_variance": 0.4081741479726938, "eval_loss": 0.3554227948188782, "eval_mae": 0.12988974153995514, "eval_mse": 0.03726029023528099, "eval_r2": 0.4077790726698564, "eval_rmse": 0.1930292397737503, "eval_runtime": 65.2859, "eval_samples_per_second": 58.879, "eval_steps_per_second": 0.934, "learning_rate": 1e-05, "step": 12127 }, { "epoch": 68.0, "eval_explained_variance": 0.4073961698091947, "eval_loss": 0.35557952523231506, "eval_mae": 0.13015064597129822, "eval_mse": 0.03740492835640907, "eval_r2": 0.4058588832439236, "eval_rmse": 0.19340354204177856, "eval_runtime": 65.1267, "eval_samples_per_second": 59.023, "eval_steps_per_second": 0.937, "learning_rate": 1e-05, "step": 12308 }, { "epoch": 69.0, "eval_explained_variance": 0.4085943423784696, "eval_loss": 0.3553701937198639, "eval_mae": 0.12976409494876862, "eval_mse": 0.03725024312734604, "eval_r2": 0.40825238050595736, "eval_rmse": 0.19300322234630585, "eval_runtime": 64.7301, "eval_samples_per_second": 59.385, "eval_steps_per_second": 0.942, "learning_rate": 1e-05, "step": 12489 }, { "epoch": 69.06077348066299, "grad_norm": 0.15430860221385956, "learning_rate": 1e-05, "loss": 0.3658, "step": 12500 }, { "epoch": 70.0, "eval_explained_variance": 0.4094207286834717, "eval_loss": 0.35594871640205383, "eval_mae": 0.13069316744804382, "eval_mse": 0.03737233206629753, "eval_r2": 0.40659481251933094, "eval_rmse": 0.19331924617290497, "eval_runtime": 66.4386, "eval_samples_per_second": 57.858, "eval_steps_per_second": 0.918, "learning_rate": 1e-05, "step": 12670 }, { "epoch": 71.0, "eval_explained_variance": 0.40725430158468395, "eval_loss": 0.35573798418045044, "eval_mae": 0.1295761913061142, "eval_mse": 0.037380401045084, "eval_r2": 0.40697699949296745, "eval_rmse": 0.19334012269973755, "eval_runtime": 65.624, "eval_samples_per_second": 58.576, "eval_steps_per_second": 0.93, "learning_rate": 1e-05, "step": 12851 }, { "epoch": 71.8232044198895, "grad_norm": 0.35482099652290344, "learning_rate": 1e-05, "loss": 0.366, "step": 13000 }, { "epoch": 72.0, "eval_explained_variance": 0.40842239214823794, "eval_loss": 0.35571375489234924, "eval_mae": 0.13028408586978912, "eval_mse": 0.03734128177165985, "eval_r2": 0.40698361470433536, "eval_rmse": 0.19323892891407013, "eval_runtime": 64.0529, "eval_samples_per_second": 60.013, "eval_steps_per_second": 0.952, "learning_rate": 1e-05, "step": 13032 }, { "epoch": 73.0, "eval_explained_variance": 0.4089708603345431, "eval_loss": 0.3552262485027313, "eval_mae": 0.12985268235206604, "eval_mse": 0.037223465740680695, "eval_r2": 0.408222457948687, "eval_rmse": 0.1929338425397873, "eval_runtime": 65.5971, "eval_samples_per_second": 58.6, "eval_steps_per_second": 0.93, "learning_rate": 1.0000000000000002e-06, "step": 13213 }, { "epoch": 74.0, "eval_explained_variance": 0.40937405824661255, "eval_loss": 0.35516515374183655, "eval_mae": 0.1281428188085556, "eval_mse": 0.03721009939908981, "eval_r2": 0.4087432799234766, "eval_rmse": 0.1928991973400116, "eval_runtime": 63.5094, "eval_samples_per_second": 60.526, "eval_steps_per_second": 0.96, "learning_rate": 1.0000000000000002e-06, "step": 13394 }, { "epoch": 74.58563535911603, "grad_norm": 0.20831693708896637, "learning_rate": 1.0000000000000002e-06, "loss": 0.3654, "step": 13500 }, { "epoch": 75.0, "eval_explained_variance": 0.40568819871315587, "eval_loss": 0.3558255434036255, "eval_mae": 0.13025221228599548, "eval_mse": 0.037474822252988815, "eval_r2": 0.40474793306670837, "eval_rmse": 0.193584144115448, "eval_runtime": 63.853, "eval_samples_per_second": 60.201, "eval_steps_per_second": 0.955, "learning_rate": 1.0000000000000002e-06, "step": 13575 }, { "epoch": 76.0, "eval_explained_variance": 0.408390985085414, "eval_loss": 0.3555220663547516, "eval_mae": 0.12769028544425964, "eval_mse": 0.03735670447349548, "eval_r2": 0.40610327648301114, "eval_rmse": 0.19327881932258606, "eval_runtime": 66.3493, "eval_samples_per_second": 57.936, "eval_steps_per_second": 0.919, "learning_rate": 1.0000000000000002e-06, "step": 13756 }, { "epoch": 77.0, "eval_explained_variance": 0.4046147374006418, "eval_loss": 0.35615718364715576, "eval_mae": 0.13205072283744812, "eval_mse": 0.037551261484622955, "eval_r2": 0.4042150129069256, "eval_rmse": 0.19378148019313812, "eval_runtime": 65.1729, "eval_samples_per_second": 58.982, "eval_steps_per_second": 0.936, "learning_rate": 1.0000000000000002e-06, "step": 13937 }, { "epoch": 77.34806629834254, "grad_norm": 0.20255261659622192, "learning_rate": 1.0000000000000002e-06, "loss": 0.3663, "step": 14000 }, { "epoch": 78.0, "eval_explained_variance": 0.4090478007610028, "eval_loss": 0.35527750849723816, "eval_mae": 0.13062655925750732, "eval_mse": 0.037214502692222595, "eval_r2": 0.4086604768416133, "eval_rmse": 0.19291061162948608, "eval_runtime": 66.5281, "eval_samples_per_second": 57.78, "eval_steps_per_second": 0.917, "learning_rate": 1.0000000000000002e-06, "step": 14118 }, { "epoch": 79.0, "eval_explained_variance": 0.4019758334526649, "eval_loss": 0.3569395840167999, "eval_mae": 0.13103225827217102, "eval_mse": 0.037889137864112854, "eval_r2": 0.3999096598660514, "eval_rmse": 0.19465132057666779, "eval_runtime": 65.9236, "eval_samples_per_second": 58.31, "eval_steps_per_second": 0.925, "learning_rate": 1.0000000000000002e-06, "step": 14299 }, { "epoch": 80.0, "eval_explained_variance": 0.4057845427439763, "eval_loss": 0.35627198219299316, "eval_mae": 0.13107524812221527, "eval_mse": 0.037464920431375504, "eval_r2": 0.40523034358958093, "eval_rmse": 0.19355857372283936, "eval_runtime": 66.5566, "eval_samples_per_second": 57.755, "eval_steps_per_second": 0.917, "learning_rate": 1.0000000000000002e-06, "step": 14480 }, { "epoch": 80.11049723756906, "grad_norm": 0.18743179738521576, "learning_rate": 1.0000000000000002e-07, "loss": 0.3655, "step": 14500 }, { "epoch": 81.0, "eval_explained_variance": 0.4091951067631061, "eval_loss": 0.3555302619934082, "eval_mae": 0.13077440857887268, "eval_mse": 0.037267763167619705, "eval_r2": 0.4078657020062894, "eval_rmse": 0.1930485963821411, "eval_runtime": 67.6736, "eval_samples_per_second": 56.802, "eval_steps_per_second": 0.901, "learning_rate": 1.0000000000000002e-07, "step": 14661 }, { "epoch": 82.0, "eval_explained_variance": 0.408656867650839, "eval_loss": 0.35563620924949646, "eval_mae": 0.13087815046310425, "eval_mse": 0.03731405362486839, "eval_r2": 0.4071799006076709, "eval_rmse": 0.19316846132278442, "eval_runtime": 68.4549, "eval_samples_per_second": 56.154, "eval_steps_per_second": 0.891, "learning_rate": 1.0000000000000002e-07, "step": 14842 }, { "epoch": 82.87292817679558, "grad_norm": 0.20405510067939758, "learning_rate": 1.0000000000000002e-07, "loss": 0.3651, "step": 15000 }, { "epoch": 83.0, "eval_explained_variance": 0.41021374555734486, "eval_loss": 0.35571029782295227, "eval_mae": 0.13036301732063293, "eval_mse": 0.03731907904148102, "eval_r2": 0.4073602568430592, "eval_rmse": 0.19318147003650665, "eval_runtime": 68.1325, "eval_samples_per_second": 56.419, "eval_steps_per_second": 0.895, "learning_rate": 1.0000000000000002e-07, "step": 15023 }, { "epoch": 84.0, "eval_explained_variance": 0.4082063390658452, "eval_loss": 0.35581377148628235, "eval_mae": 0.1305844783782959, "eval_mse": 0.037393905222415924, "eval_r2": 0.4062799764902456, "eval_rmse": 0.19337503612041473, "eval_runtime": 66.3433, "eval_samples_per_second": 57.941, "eval_steps_per_second": 0.919, "learning_rate": 1.0000000000000002e-07, "step": 15204 }, { "epoch": 84.0, "learning_rate": 1.0000000000000002e-07, "step": 15204, "total_flos": 2.180798470217171e+19, "train_loss": 0.37467605181350044, "train_runtime": 24668.9414, "train_samples_per_second": 46.707, "train_steps_per_second": 0.734 } ], "logging_steps": 500, "max_steps": 18100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.180798470217171e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }