lombardata's picture
Evaluation on the test set completed on 2024_09_18.
f2e976c verified
raw
history blame contribute delete
No virus
43.5 kB
{
"best_metric": 0.35516515374183655,
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-large-2024_09_17-batch-size64_epochs100_freeze/checkpoint-13394",
"epoch": 84.0,
"eval_steps": 500,
"global_step": 15204,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_explained_variance": 0.28046968350043666,
"eval_loss": 0.38582414388656616,
"eval_mae": 0.15708860754966736,
"eval_mse": 0.04635250195860863,
"eval_r2": 0.26238919671070565,
"eval_rmse": 0.21529631316661835,
"eval_runtime": 68.5532,
"eval_samples_per_second": 56.073,
"eval_steps_per_second": 0.89,
"learning_rate": 0.001,
"step": 181
},
{
"epoch": 2.0,
"eval_explained_variance": 0.32091750548436093,
"eval_loss": 0.37635815143585205,
"eval_mae": 0.1467229723930359,
"eval_mse": 0.04398971050977707,
"eval_r2": 0.3120521114085856,
"eval_rmse": 0.20973724126815796,
"eval_runtime": 65.3616,
"eval_samples_per_second": 58.811,
"eval_steps_per_second": 0.933,
"learning_rate": 0.001,
"step": 362
},
{
"epoch": 2.7624309392265194,
"grad_norm": 0.29469817876815796,
"learning_rate": 0.001,
"loss": 0.4473,
"step": 500
},
{
"epoch": 3.0,
"eval_explained_variance": 0.3393913645010728,
"eval_loss": 0.3715941309928894,
"eval_mae": 0.1450481116771698,
"eval_mse": 0.04250793904066086,
"eval_r2": 0.33185449725151883,
"eval_rmse": 0.20617453753948212,
"eval_runtime": 65.4448,
"eval_samples_per_second": 58.737,
"eval_steps_per_second": 0.932,
"learning_rate": 0.001,
"step": 543
},
{
"epoch": 4.0,
"eval_explained_variance": 0.35663692767803484,
"eval_loss": 0.3672849237918854,
"eval_mae": 0.1395464986562729,
"eval_mse": 0.0409623458981514,
"eval_r2": 0.35477505001012255,
"eval_rmse": 0.20239156484603882,
"eval_runtime": 65.8223,
"eval_samples_per_second": 58.4,
"eval_steps_per_second": 0.927,
"learning_rate": 0.001,
"step": 724
},
{
"epoch": 5.0,
"eval_explained_variance": 0.3493932577279898,
"eval_loss": 0.3692065477371216,
"eval_mae": 0.1393202394247055,
"eval_mse": 0.041857048869132996,
"eval_r2": 0.3425061497286567,
"eval_rmse": 0.20458994805812836,
"eval_runtime": 66.3389,
"eval_samples_per_second": 57.945,
"eval_steps_per_second": 0.92,
"learning_rate": 0.001,
"step": 905
},
{
"epoch": 5.524861878453039,
"grad_norm": 0.19042304158210754,
"learning_rate": 0.001,
"loss": 0.3892,
"step": 1000
},
{
"epoch": 6.0,
"eval_explained_variance": 0.35904277287996733,
"eval_loss": 0.3672534227371216,
"eval_mae": 0.14119164645671844,
"eval_mse": 0.040877003222703934,
"eval_r2": 0.3553590945142445,
"eval_rmse": 0.2021806240081787,
"eval_runtime": 65.5836,
"eval_samples_per_second": 58.612,
"eval_steps_per_second": 0.93,
"learning_rate": 0.001,
"step": 1086
},
{
"epoch": 7.0,
"eval_explained_variance": 0.34988729311869693,
"eval_loss": 0.3680865168571472,
"eval_mae": 0.14079739153385162,
"eval_mse": 0.04153257608413696,
"eval_r2": 0.3456613343062778,
"eval_rmse": 0.2037954330444336,
"eval_runtime": 64.4017,
"eval_samples_per_second": 59.688,
"eval_steps_per_second": 0.947,
"learning_rate": 0.001,
"step": 1267
},
{
"epoch": 8.0,
"eval_explained_variance": 0.36423414945602417,
"eval_loss": 0.365603506565094,
"eval_mae": 0.13892073929309845,
"eval_mse": 0.04058730974793434,
"eval_r2": 0.35962535995096967,
"eval_rmse": 0.20146292448043823,
"eval_runtime": 64.831,
"eval_samples_per_second": 59.293,
"eval_steps_per_second": 0.941,
"learning_rate": 0.001,
"step": 1448
},
{
"epoch": 8.287292817679559,
"grad_norm": 0.1760077178478241,
"learning_rate": 0.001,
"loss": 0.3855,
"step": 1500
},
{
"epoch": 9.0,
"eval_explained_variance": 0.3612723258825449,
"eval_loss": 0.36585840582847595,
"eval_mae": 0.13438531756401062,
"eval_mse": 0.04076695442199707,
"eval_r2": 0.3554776353070419,
"eval_rmse": 0.20190827548503876,
"eval_runtime": 64.4706,
"eval_samples_per_second": 59.624,
"eval_steps_per_second": 0.946,
"learning_rate": 0.001,
"step": 1629
},
{
"epoch": 10.0,
"eval_explained_variance": 0.3561701728747441,
"eval_loss": 0.366574227809906,
"eval_mae": 0.13837845623493195,
"eval_mse": 0.04093795642256737,
"eval_r2": 0.3533183127533612,
"eval_rmse": 0.2023313045501709,
"eval_runtime": 63.2978,
"eval_samples_per_second": 60.729,
"eval_steps_per_second": 0.964,
"learning_rate": 0.001,
"step": 1810
},
{
"epoch": 11.0,
"eval_explained_variance": 0.3574172487625709,
"eval_loss": 0.36660775542259216,
"eval_mae": 0.13663478195667267,
"eval_mse": 0.04090488329529762,
"eval_r2": 0.35496352056496683,
"eval_rmse": 0.20224955677986145,
"eval_runtime": 66.2827,
"eval_samples_per_second": 57.994,
"eval_steps_per_second": 0.92,
"learning_rate": 0.001,
"step": 1991
},
{
"epoch": 11.049723756906078,
"grad_norm": 0.14891982078552246,
"learning_rate": 0.001,
"loss": 0.3816,
"step": 2000
},
{
"epoch": 12.0,
"eval_explained_variance": 0.3598099580177894,
"eval_loss": 0.36626219749450684,
"eval_mae": 0.13958622515201569,
"eval_mse": 0.04085636883974075,
"eval_r2": 0.35871773520484396,
"eval_rmse": 0.20212958753108978,
"eval_runtime": 64.7339,
"eval_samples_per_second": 59.382,
"eval_steps_per_second": 0.942,
"learning_rate": 0.001,
"step": 2172
},
{
"epoch": 13.0,
"eval_explained_variance": 0.37047534722548264,
"eval_loss": 0.3631901741027832,
"eval_mae": 0.1360856592655182,
"eval_mse": 0.03979066386818886,
"eval_r2": 0.3696611807758026,
"eval_rmse": 0.1994759738445282,
"eval_runtime": 65.3689,
"eval_samples_per_second": 58.805,
"eval_steps_per_second": 0.933,
"learning_rate": 0.001,
"step": 2353
},
{
"epoch": 13.812154696132596,
"grad_norm": 0.14235170185565948,
"learning_rate": 0.001,
"loss": 0.381,
"step": 2500
},
{
"epoch": 14.0,
"eval_explained_variance": 0.36284926304450404,
"eval_loss": 0.36694806814193726,
"eval_mae": 0.14229656755924225,
"eval_mse": 0.04098258540034294,
"eval_r2": 0.356153731540797,
"eval_rmse": 0.20244155824184418,
"eval_runtime": 64.126,
"eval_samples_per_second": 59.945,
"eval_steps_per_second": 0.951,
"learning_rate": 0.001,
"step": 2534
},
{
"epoch": 15.0,
"eval_explained_variance": 0.36449302159822905,
"eval_loss": 0.3644973933696747,
"eval_mae": 0.1395292580127716,
"eval_mse": 0.04036581516265869,
"eval_r2": 0.36203359510531696,
"eval_rmse": 0.2009124606847763,
"eval_runtime": 64.0305,
"eval_samples_per_second": 60.034,
"eval_steps_per_second": 0.953,
"learning_rate": 0.001,
"step": 2715
},
{
"epoch": 16.0,
"eval_explained_variance": 0.37152041838719296,
"eval_loss": 0.36393943428993225,
"eval_mae": 0.13569381833076477,
"eval_mse": 0.039987124502658844,
"eval_r2": 0.36948082804864185,
"eval_rmse": 0.19996780157089233,
"eval_runtime": 63.9139,
"eval_samples_per_second": 60.143,
"eval_steps_per_second": 0.954,
"learning_rate": 0.001,
"step": 2896
},
{
"epoch": 16.574585635359117,
"grad_norm": 0.13048891723155975,
"learning_rate": 0.001,
"loss": 0.3811,
"step": 3000
},
{
"epoch": 17.0,
"eval_explained_variance": 0.37284482900912946,
"eval_loss": 0.36665406823158264,
"eval_mae": 0.14128881692886353,
"eval_mse": 0.04064851254224777,
"eval_r2": 0.3621847777710853,
"eval_rmse": 0.20161476731300354,
"eval_runtime": 66.0408,
"eval_samples_per_second": 58.206,
"eval_steps_per_second": 0.924,
"learning_rate": 0.001,
"step": 3077
},
{
"epoch": 18.0,
"eval_explained_variance": 0.3705295782822829,
"eval_loss": 0.36318618059158325,
"eval_mae": 0.13683417439460754,
"eval_mse": 0.03981361910700798,
"eval_r2": 0.369508628045091,
"eval_rmse": 0.19953350722789764,
"eval_runtime": 63.7575,
"eval_samples_per_second": 60.291,
"eval_steps_per_second": 0.957,
"learning_rate": 0.001,
"step": 3258
},
{
"epoch": 19.0,
"eval_explained_variance": 0.3733598177249615,
"eval_loss": 0.36302879452705383,
"eval_mae": 0.13539017736911774,
"eval_mse": 0.03974781930446625,
"eval_r2": 0.3718927441003872,
"eval_rmse": 0.19936855137348175,
"eval_runtime": 63.4414,
"eval_samples_per_second": 60.591,
"eval_steps_per_second": 0.962,
"learning_rate": 0.001,
"step": 3439
},
{
"epoch": 19.337016574585636,
"grad_norm": 0.13633792102336884,
"learning_rate": 0.001,
"loss": 0.3792,
"step": 3500
},
{
"epoch": 20.0,
"eval_explained_variance": 0.3622324833503136,
"eval_loss": 0.36489424109458923,
"eval_mae": 0.13486731052398682,
"eval_mse": 0.04052112251520157,
"eval_r2": 0.35869592759647334,
"eval_rmse": 0.20129859447479248,
"eval_runtime": 64.295,
"eval_samples_per_second": 59.787,
"eval_steps_per_second": 0.949,
"learning_rate": 0.001,
"step": 3620
},
{
"epoch": 21.0,
"eval_explained_variance": 0.3630923858055702,
"eval_loss": 0.3665030300617218,
"eval_mae": 0.13610774278640747,
"eval_mse": 0.040700096637010574,
"eval_r2": 0.3584834523421166,
"eval_rmse": 0.20174264907836914,
"eval_runtime": 64.1739,
"eval_samples_per_second": 59.9,
"eval_steps_per_second": 0.951,
"learning_rate": 0.001,
"step": 3801
},
{
"epoch": 22.0,
"eval_explained_variance": 0.3704591485170218,
"eval_loss": 0.3647814095020294,
"eval_mae": 0.1368531733751297,
"eval_mse": 0.03999844938516617,
"eval_r2": 0.3677615209740873,
"eval_rmse": 0.19999612867832184,
"eval_runtime": 63.5961,
"eval_samples_per_second": 60.444,
"eval_steps_per_second": 0.959,
"learning_rate": 0.001,
"step": 3982
},
{
"epoch": 22.099447513812155,
"grad_norm": 0.1797100454568863,
"learning_rate": 0.001,
"loss": 0.3808,
"step": 4000
},
{
"epoch": 23.0,
"eval_explained_variance": 0.3736427976534917,
"eval_loss": 0.3633384704589844,
"eval_mae": 0.1356455683708191,
"eval_mse": 0.039849139750003815,
"eval_r2": 0.37049292256309013,
"eval_rmse": 0.1996224969625473,
"eval_runtime": 63.5905,
"eval_samples_per_second": 60.449,
"eval_steps_per_second": 0.959,
"learning_rate": 0.001,
"step": 4163
},
{
"epoch": 24.0,
"eval_explained_variance": 0.3761314291220445,
"eval_loss": 0.3632254898548126,
"eval_mae": 0.13934393227100372,
"eval_mse": 0.03965350612998009,
"eval_r2": 0.3725149173190659,
"eval_rmse": 0.19913187623023987,
"eval_runtime": 63.5074,
"eval_samples_per_second": 60.528,
"eval_steps_per_second": 0.961,
"learning_rate": 0.001,
"step": 4344
},
{
"epoch": 24.861878453038674,
"grad_norm": 0.10225138068199158,
"learning_rate": 0.001,
"loss": 0.3796,
"step": 4500
},
{
"epoch": 25.0,
"eval_explained_variance": 0.37342833555661714,
"eval_loss": 0.3638208210468292,
"eval_mae": 0.13812901079654694,
"eval_mse": 0.03988226130604744,
"eval_r2": 0.3698107462777432,
"eval_rmse": 0.19970543682575226,
"eval_runtime": 64.082,
"eval_samples_per_second": 59.986,
"eval_steps_per_second": 0.952,
"learning_rate": 0.001,
"step": 4525
},
{
"epoch": 26.0,
"eval_explained_variance": 0.38356072627581084,
"eval_loss": 0.3607248365879059,
"eval_mae": 0.132920041680336,
"eval_mse": 0.03901772201061249,
"eval_r2": 0.3818014601715421,
"eval_rmse": 0.19752904772758484,
"eval_runtime": 63.8273,
"eval_samples_per_second": 60.225,
"eval_steps_per_second": 0.956,
"learning_rate": 0.0001,
"step": 4706
},
{
"epoch": 27.0,
"eval_explained_variance": 0.3877932016666119,
"eval_loss": 0.3599555194377899,
"eval_mae": 0.13530299067497253,
"eval_mse": 0.038680098950862885,
"eval_r2": 0.3862897971569748,
"eval_rmse": 0.19667257368564606,
"eval_runtime": 63.6171,
"eval_samples_per_second": 60.424,
"eval_steps_per_second": 0.959,
"learning_rate": 0.0001,
"step": 4887
},
{
"epoch": 27.624309392265193,
"grad_norm": 0.09920254349708557,
"learning_rate": 0.0001,
"loss": 0.3765,
"step": 5000
},
{
"epoch": 28.0,
"eval_explained_variance": 0.39040088195067185,
"eval_loss": 0.35923057794570923,
"eval_mae": 0.13371111452579498,
"eval_mse": 0.038444750010967255,
"eval_r2": 0.3893828319749203,
"eval_rmse": 0.19607332348823547,
"eval_runtime": 63.6463,
"eval_samples_per_second": 60.396,
"eval_steps_per_second": 0.958,
"learning_rate": 0.0001,
"step": 5068
},
{
"epoch": 29.0,
"eval_explained_variance": 0.39147963432165295,
"eval_loss": 0.3595493733882904,
"eval_mae": 0.13497120141983032,
"eval_mse": 0.03846590965986252,
"eval_r2": 0.3891551349793923,
"eval_rmse": 0.1961272805929184,
"eval_runtime": 63.7787,
"eval_samples_per_second": 60.271,
"eval_steps_per_second": 0.956,
"learning_rate": 0.0001,
"step": 5249
},
{
"epoch": 30.0,
"eval_explained_variance": 0.3893452011621915,
"eval_loss": 0.35978832840919495,
"eval_mae": 0.13498304784297943,
"eval_mse": 0.03862994909286499,
"eval_r2": 0.3876274861623127,
"eval_rmse": 0.19654503464698792,
"eval_runtime": 63.918,
"eval_samples_per_second": 60.14,
"eval_steps_per_second": 0.954,
"learning_rate": 0.0001,
"step": 5430
},
{
"epoch": 30.386740331491712,
"grad_norm": 0.09680859744548798,
"learning_rate": 0.0001,
"loss": 0.373,
"step": 5500
},
{
"epoch": 31.0,
"eval_explained_variance": 0.39206390655957735,
"eval_loss": 0.35871124267578125,
"eval_mae": 0.131711944937706,
"eval_mse": 0.03838532418012619,
"eval_r2": 0.39069464683386806,
"eval_rmse": 0.19592173397541046,
"eval_runtime": 63.6009,
"eval_samples_per_second": 60.439,
"eval_steps_per_second": 0.959,
"learning_rate": 0.0001,
"step": 5611
},
{
"epoch": 32.0,
"eval_explained_variance": 0.39324428943487316,
"eval_loss": 0.35840144753456116,
"eval_mae": 0.13263028860092163,
"eval_mse": 0.0382704883813858,
"eval_r2": 0.39277553504116497,
"eval_rmse": 0.19562844932079315,
"eval_runtime": 63.4174,
"eval_samples_per_second": 60.614,
"eval_steps_per_second": 0.962,
"learning_rate": 0.0001,
"step": 5792
},
{
"epoch": 33.0,
"eval_explained_variance": 0.3953018326025743,
"eval_loss": 0.35809990763664246,
"eval_mae": 0.13110357522964478,
"eval_mse": 0.03812328726053238,
"eval_r2": 0.39453575095056653,
"eval_rmse": 0.19525185227394104,
"eval_runtime": 62.9848,
"eval_samples_per_second": 61.031,
"eval_steps_per_second": 0.968,
"learning_rate": 0.0001,
"step": 5973
},
{
"epoch": 33.149171270718234,
"grad_norm": 0.10557221621274948,
"learning_rate": 0.0001,
"loss": 0.3735,
"step": 6000
},
{
"epoch": 34.0,
"eval_explained_variance": 0.3966822119859549,
"eval_loss": 0.3580343723297119,
"eval_mae": 0.13232208788394928,
"eval_mse": 0.038077060133218765,
"eval_r2": 0.3953078482977419,
"eval_rmse": 0.19513344764709473,
"eval_runtime": 63.9448,
"eval_samples_per_second": 60.114,
"eval_steps_per_second": 0.954,
"learning_rate": 0.0001,
"step": 6154
},
{
"epoch": 35.0,
"eval_explained_variance": 0.39542460441589355,
"eval_loss": 0.3578670918941498,
"eval_mae": 0.13223391771316528,
"eval_mse": 0.038055673241615295,
"eval_r2": 0.3949423136793632,
"eval_rmse": 0.19507862627506256,
"eval_runtime": 62.8884,
"eval_samples_per_second": 61.124,
"eval_steps_per_second": 0.97,
"learning_rate": 0.0001,
"step": 6335
},
{
"epoch": 35.91160220994475,
"grad_norm": 0.11413700878620148,
"learning_rate": 0.0001,
"loss": 0.3711,
"step": 6500
},
{
"epoch": 36.0,
"eval_explained_variance": 0.38986305548594546,
"eval_loss": 0.35921958088874817,
"eval_mae": 0.13451573252677917,
"eval_mse": 0.0385238379240036,
"eval_r2": 0.3895210802446932,
"eval_rmse": 0.19627490639686584,
"eval_runtime": 63.9244,
"eval_samples_per_second": 60.134,
"eval_steps_per_second": 0.954,
"learning_rate": 0.0001,
"step": 6516
},
{
"epoch": 37.0,
"eval_explained_variance": 0.39700071628277117,
"eval_loss": 0.35754600167274475,
"eval_mae": 0.13133254647254944,
"eval_mse": 0.037971220910549164,
"eval_r2": 0.3965857587136563,
"eval_rmse": 0.19486205279827118,
"eval_runtime": 63.3201,
"eval_samples_per_second": 60.707,
"eval_steps_per_second": 0.963,
"learning_rate": 0.0001,
"step": 6697
},
{
"epoch": 38.0,
"eval_explained_variance": 0.39355502220300526,
"eval_loss": 0.35816583037376404,
"eval_mae": 0.13258841633796692,
"eval_mse": 0.038258858025074005,
"eval_r2": 0.39226546341596713,
"eval_rmse": 0.19559872150421143,
"eval_runtime": 62.6934,
"eval_samples_per_second": 61.314,
"eval_steps_per_second": 0.973,
"learning_rate": 0.0001,
"step": 6878
},
{
"epoch": 38.67403314917127,
"grad_norm": 0.147694930434227,
"learning_rate": 0.0001,
"loss": 0.3705,
"step": 7000
},
{
"epoch": 39.0,
"eval_explained_variance": 0.3965281844139099,
"eval_loss": 0.3575587570667267,
"eval_mae": 0.1313440054655075,
"eval_mse": 0.03796360641717911,
"eval_r2": 0.39630358388937376,
"eval_rmse": 0.19484251737594604,
"eval_runtime": 62.5891,
"eval_samples_per_second": 61.416,
"eval_steps_per_second": 0.975,
"learning_rate": 0.0001,
"step": 7059
},
{
"epoch": 40.0,
"eval_explained_variance": 0.399988224873176,
"eval_loss": 0.3574675917625427,
"eval_mae": 0.13325949013233185,
"eval_mse": 0.03790339455008507,
"eval_r2": 0.3980004685467563,
"eval_rmse": 0.19468794763088226,
"eval_runtime": 63.1438,
"eval_samples_per_second": 60.877,
"eval_steps_per_second": 0.966,
"learning_rate": 0.0001,
"step": 7240
},
{
"epoch": 41.0,
"eval_explained_variance": 0.39883482914704543,
"eval_loss": 0.35797080397605896,
"eval_mae": 0.13172872364521027,
"eval_mse": 0.03810995817184448,
"eval_r2": 0.3955525420135218,
"eval_rmse": 0.19521771371364594,
"eval_runtime": 63.8253,
"eval_samples_per_second": 60.227,
"eval_steps_per_second": 0.956,
"learning_rate": 0.0001,
"step": 7421
},
{
"epoch": 41.43646408839779,
"grad_norm": 0.13456250727176666,
"learning_rate": 0.0001,
"loss": 0.3704,
"step": 7500
},
{
"epoch": 42.0,
"eval_explained_variance": 0.39858559003243077,
"eval_loss": 0.3574862778186798,
"eval_mae": 0.13303333520889282,
"eval_mse": 0.03798728436231613,
"eval_r2": 0.39695276377811434,
"eval_rmse": 0.19490326941013336,
"eval_runtime": 67.394,
"eval_samples_per_second": 57.038,
"eval_steps_per_second": 0.905,
"learning_rate": 0.0001,
"step": 7602
},
{
"epoch": 43.0,
"eval_explained_variance": 0.40196093229147106,
"eval_loss": 0.3568632900714874,
"eval_mae": 0.13252291083335876,
"eval_mse": 0.03772151470184326,
"eval_r2": 0.4008098201061217,
"eval_rmse": 0.19422027468681335,
"eval_runtime": 64.6291,
"eval_samples_per_second": 59.478,
"eval_steps_per_second": 0.944,
"learning_rate": 0.0001,
"step": 7783
},
{
"epoch": 44.0,
"eval_explained_variance": 0.4026290269998404,
"eval_loss": 0.35680440068244934,
"eval_mae": 0.13054220378398895,
"eval_mse": 0.03770707920193672,
"eval_r2": 0.4009435040202465,
"eval_rmse": 0.1941831111907959,
"eval_runtime": 64.3612,
"eval_samples_per_second": 59.725,
"eval_steps_per_second": 0.948,
"learning_rate": 0.0001,
"step": 7964
},
{
"epoch": 44.19889502762431,
"grad_norm": 0.12347038835287094,
"learning_rate": 0.0001,
"loss": 0.3695,
"step": 8000
},
{
"epoch": 45.0,
"eval_explained_variance": 0.40327414182516247,
"eval_loss": 0.35672253370285034,
"eval_mae": 0.13190330564975739,
"eval_mse": 0.03762032091617584,
"eval_r2": 0.40209036636711937,
"eval_rmse": 0.193959578871727,
"eval_runtime": 63.5564,
"eval_samples_per_second": 60.482,
"eval_steps_per_second": 0.96,
"learning_rate": 0.0001,
"step": 8145
},
{
"epoch": 46.0,
"eval_explained_variance": 0.4014772314291734,
"eval_loss": 0.35691043734550476,
"eval_mae": 0.1298011690378189,
"eval_mse": 0.03774061053991318,
"eval_r2": 0.39979262898816803,
"eval_rmse": 0.19426943361759186,
"eval_runtime": 63.5835,
"eval_samples_per_second": 60.456,
"eval_steps_per_second": 0.959,
"learning_rate": 0.0001,
"step": 8326
},
{
"epoch": 46.96132596685083,
"grad_norm": 0.1476801335811615,
"learning_rate": 0.0001,
"loss": 0.369,
"step": 8500
},
{
"epoch": 47.0,
"eval_explained_variance": 0.39959606299033534,
"eval_loss": 0.3573501706123352,
"eval_mae": 0.12922033667564392,
"eval_mse": 0.03795965388417244,
"eval_r2": 0.39734844502667516,
"eval_rmse": 0.19483236968517303,
"eval_runtime": 64.1983,
"eval_samples_per_second": 59.877,
"eval_steps_per_second": 0.95,
"learning_rate": 0.0001,
"step": 8507
},
{
"epoch": 48.0,
"eval_explained_variance": 0.404104429941911,
"eval_loss": 0.35634738206863403,
"eval_mae": 0.13015295565128326,
"eval_mse": 0.03764864429831505,
"eval_r2": 0.4019054071784941,
"eval_rmse": 0.19403257966041565,
"eval_runtime": 63.8043,
"eval_samples_per_second": 60.247,
"eval_steps_per_second": 0.956,
"learning_rate": 0.0001,
"step": 8688
},
{
"epoch": 49.0,
"eval_explained_variance": 0.4024105530518752,
"eval_loss": 0.3566192090511322,
"eval_mae": 0.1305515021085739,
"eval_mse": 0.03765449672937393,
"eval_r2": 0.40112185894390806,
"eval_rmse": 0.19404765963554382,
"eval_runtime": 65.5486,
"eval_samples_per_second": 58.644,
"eval_steps_per_second": 0.931,
"learning_rate": 0.0001,
"step": 8869
},
{
"epoch": 49.72375690607735,
"grad_norm": 0.17585940659046173,
"learning_rate": 0.0001,
"loss": 0.3691,
"step": 9000
},
{
"epoch": 50.0,
"eval_explained_variance": 0.40147255475704485,
"eval_loss": 0.3571104109287262,
"eval_mae": 0.13218748569488525,
"eval_mse": 0.0377979539334774,
"eval_r2": 0.39978904676068683,
"eval_rmse": 0.19441695511341095,
"eval_runtime": 64.6683,
"eval_samples_per_second": 59.442,
"eval_steps_per_second": 0.943,
"learning_rate": 0.0001,
"step": 9050
},
{
"epoch": 51.0,
"eval_explained_variance": 0.4020539063673753,
"eval_loss": 0.3584417402744293,
"eval_mae": 0.13350461423397064,
"eval_mse": 0.03811892494559288,
"eval_r2": 0.39583579837070054,
"eval_rmse": 0.19524069130420685,
"eval_runtime": 64.7621,
"eval_samples_per_second": 59.356,
"eval_steps_per_second": 0.942,
"learning_rate": 0.0001,
"step": 9231
},
{
"epoch": 52.0,
"eval_explained_variance": 0.4045378336539635,
"eval_loss": 0.3561328649520874,
"eval_mae": 0.1308905929327011,
"eval_mse": 0.03748491033911705,
"eval_r2": 0.4042346756357482,
"eval_rmse": 0.19361020624637604,
"eval_runtime": 64.77,
"eval_samples_per_second": 59.349,
"eval_steps_per_second": 0.942,
"learning_rate": 0.0001,
"step": 9412
},
{
"epoch": 52.48618784530387,
"grad_norm": 0.15689648687839508,
"learning_rate": 0.0001,
"loss": 0.3677,
"step": 9500
},
{
"epoch": 53.0,
"eval_explained_variance": 0.4053275997822101,
"eval_loss": 0.35652926564216614,
"eval_mae": 0.13147617876529694,
"eval_mse": 0.03759394586086273,
"eval_r2": 0.4026062075156075,
"eval_rmse": 0.19389158487319946,
"eval_runtime": 64.8021,
"eval_samples_per_second": 59.319,
"eval_steps_per_second": 0.941,
"learning_rate": 0.0001,
"step": 9593
},
{
"epoch": 54.0,
"eval_explained_variance": 0.401798074062054,
"eval_loss": 0.3567388355731964,
"eval_mae": 0.13164331018924713,
"eval_mse": 0.03773793205618858,
"eval_r2": 0.40105556644385676,
"eval_rmse": 0.1942625343799591,
"eval_runtime": 65.4024,
"eval_samples_per_second": 58.775,
"eval_steps_per_second": 0.933,
"learning_rate": 0.0001,
"step": 9774
},
{
"epoch": 55.0,
"eval_explained_variance": 0.40524112719755906,
"eval_loss": 0.35645580291748047,
"eval_mae": 0.1291799694299698,
"eval_mse": 0.03761202096939087,
"eval_r2": 0.40258003846192925,
"eval_rmse": 0.19393819570541382,
"eval_runtime": 65.1148,
"eval_samples_per_second": 59.034,
"eval_steps_per_second": 0.937,
"learning_rate": 0.0001,
"step": 9955
},
{
"epoch": 55.248618784530386,
"grad_norm": 0.14432880282402039,
"learning_rate": 0.0001,
"loss": 0.3684,
"step": 10000
},
{
"epoch": 56.0,
"eval_explained_variance": 0.40458508179737973,
"eval_loss": 0.35665351152420044,
"eval_mae": 0.12790292501449585,
"eval_mse": 0.03767779469490051,
"eval_r2": 0.40173746899832624,
"eval_rmse": 0.19410768151283264,
"eval_runtime": 64.7859,
"eval_samples_per_second": 59.334,
"eval_steps_per_second": 0.942,
"learning_rate": 0.0001,
"step": 10136
},
{
"epoch": 57.0,
"eval_explained_variance": 0.40489131670731765,
"eval_loss": 0.35622259974479675,
"eval_mae": 0.12940338253974915,
"eval_mse": 0.03757502883672714,
"eval_r2": 0.40317412718530543,
"eval_rmse": 0.1938427984714508,
"eval_runtime": 64.354,
"eval_samples_per_second": 59.732,
"eval_steps_per_second": 0.948,
"learning_rate": 0.0001,
"step": 10317
},
{
"epoch": 58.0,
"eval_explained_variance": 0.40618401765823364,
"eval_loss": 0.35649776458740234,
"eval_mae": 0.12992320954799652,
"eval_mse": 0.03755363076925278,
"eval_r2": 0.40359610267984325,
"eval_rmse": 0.1937875896692276,
"eval_runtime": 63.5875,
"eval_samples_per_second": 60.452,
"eval_steps_per_second": 0.959,
"learning_rate": 0.0001,
"step": 10498
},
{
"epoch": 58.011049723756905,
"grad_norm": 0.17977654933929443,
"learning_rate": 1e-05,
"loss": 0.368,
"step": 10500
},
{
"epoch": 59.0,
"eval_explained_variance": 0.40612818186099714,
"eval_loss": 0.3559414744377136,
"eval_mae": 0.1292232871055603,
"eval_mse": 0.037484604865312576,
"eval_r2": 0.404684355302516,
"eval_rmse": 0.19360941648483276,
"eval_runtime": 63.0292,
"eval_samples_per_second": 60.988,
"eval_steps_per_second": 0.968,
"learning_rate": 1e-05,
"step": 10679
},
{
"epoch": 60.0,
"eval_explained_variance": 0.4082453021636376,
"eval_loss": 0.35587525367736816,
"eval_mae": 0.1295480728149414,
"eval_mse": 0.03739844262599945,
"eval_r2": 0.40598491760734956,
"eval_rmse": 0.1933867633342743,
"eval_runtime": 67.1089,
"eval_samples_per_second": 57.28,
"eval_steps_per_second": 0.909,
"learning_rate": 1e-05,
"step": 10860
},
{
"epoch": 60.773480662983424,
"grad_norm": 0.1965423822402954,
"learning_rate": 1e-05,
"loss": 0.3664,
"step": 11000
},
{
"epoch": 61.0,
"eval_explained_variance": 0.4074813173367427,
"eval_loss": 0.35549554228782654,
"eval_mae": 0.13036619126796722,
"eval_mse": 0.03731352090835571,
"eval_r2": 0.40719759569271147,
"eval_rmse": 0.1931670755147934,
"eval_runtime": 62.4919,
"eval_samples_per_second": 61.512,
"eval_steps_per_second": 0.976,
"learning_rate": 1e-05,
"step": 11041
},
{
"epoch": 62.0,
"eval_explained_variance": 0.4057550017650311,
"eval_loss": 0.3564907908439636,
"eval_mae": 0.13166674971580505,
"eval_mse": 0.03761378303170204,
"eval_r2": 0.4036480162510964,
"eval_rmse": 0.19394272565841675,
"eval_runtime": 64.0633,
"eval_samples_per_second": 60.003,
"eval_steps_per_second": 0.952,
"learning_rate": 1e-05,
"step": 11222
},
{
"epoch": 63.0,
"eval_explained_variance": 0.4086620624248798,
"eval_loss": 0.35556313395500183,
"eval_mae": 0.12934741377830505,
"eval_mse": 0.03726600110530853,
"eval_r2": 0.40751167332410276,
"eval_rmse": 0.1930440366268158,
"eval_runtime": 63.2366,
"eval_samples_per_second": 60.788,
"eval_steps_per_second": 0.965,
"learning_rate": 1e-05,
"step": 11403
},
{
"epoch": 63.53591160220994,
"grad_norm": 0.1525866687297821,
"learning_rate": 1e-05,
"loss": 0.366,
"step": 11500
},
{
"epoch": 64.0,
"eval_explained_variance": 0.40886356280400205,
"eval_loss": 0.35541364550590515,
"eval_mae": 0.1295996755361557,
"eval_mse": 0.03727412968873978,
"eval_r2": 0.40770017250386054,
"eval_rmse": 0.1930650919675827,
"eval_runtime": 63.8539,
"eval_samples_per_second": 60.2,
"eval_steps_per_second": 0.955,
"learning_rate": 1e-05,
"step": 11584
},
{
"epoch": 65.0,
"eval_explained_variance": 0.40589494430101836,
"eval_loss": 0.35602322220802307,
"eval_mae": 0.13072702288627625,
"eval_mse": 0.03753972053527832,
"eval_r2": 0.4048648390836954,
"eval_rmse": 0.19375169277191162,
"eval_runtime": 63.5254,
"eval_samples_per_second": 60.511,
"eval_steps_per_second": 0.96,
"learning_rate": 1e-05,
"step": 11765
},
{
"epoch": 66.0,
"eval_explained_variance": 0.4085214688227727,
"eval_loss": 0.35534363985061646,
"eval_mae": 0.13003438711166382,
"eval_mse": 0.03723596781492233,
"eval_r2": 0.40801214840672984,
"eval_rmse": 0.19296623766422272,
"eval_runtime": 66.0061,
"eval_samples_per_second": 58.237,
"eval_steps_per_second": 0.924,
"learning_rate": 1e-05,
"step": 11946
},
{
"epoch": 66.29834254143647,
"grad_norm": 0.18801870942115784,
"learning_rate": 1e-05,
"loss": 0.3654,
"step": 12000
},
{
"epoch": 67.0,
"eval_explained_variance": 0.4081741479726938,
"eval_loss": 0.3554227948188782,
"eval_mae": 0.12988974153995514,
"eval_mse": 0.03726029023528099,
"eval_r2": 0.4077790726698564,
"eval_rmse": 0.1930292397737503,
"eval_runtime": 65.2859,
"eval_samples_per_second": 58.879,
"eval_steps_per_second": 0.934,
"learning_rate": 1e-05,
"step": 12127
},
{
"epoch": 68.0,
"eval_explained_variance": 0.4073961698091947,
"eval_loss": 0.35557952523231506,
"eval_mae": 0.13015064597129822,
"eval_mse": 0.03740492835640907,
"eval_r2": 0.4058588832439236,
"eval_rmse": 0.19340354204177856,
"eval_runtime": 65.1267,
"eval_samples_per_second": 59.023,
"eval_steps_per_second": 0.937,
"learning_rate": 1e-05,
"step": 12308
},
{
"epoch": 69.0,
"eval_explained_variance": 0.4085943423784696,
"eval_loss": 0.3553701937198639,
"eval_mae": 0.12976409494876862,
"eval_mse": 0.03725024312734604,
"eval_r2": 0.40825238050595736,
"eval_rmse": 0.19300322234630585,
"eval_runtime": 64.7301,
"eval_samples_per_second": 59.385,
"eval_steps_per_second": 0.942,
"learning_rate": 1e-05,
"step": 12489
},
{
"epoch": 69.06077348066299,
"grad_norm": 0.15430860221385956,
"learning_rate": 1e-05,
"loss": 0.3658,
"step": 12500
},
{
"epoch": 70.0,
"eval_explained_variance": 0.4094207286834717,
"eval_loss": 0.35594871640205383,
"eval_mae": 0.13069316744804382,
"eval_mse": 0.03737233206629753,
"eval_r2": 0.40659481251933094,
"eval_rmse": 0.19331924617290497,
"eval_runtime": 66.4386,
"eval_samples_per_second": 57.858,
"eval_steps_per_second": 0.918,
"learning_rate": 1e-05,
"step": 12670
},
{
"epoch": 71.0,
"eval_explained_variance": 0.40725430158468395,
"eval_loss": 0.35573798418045044,
"eval_mae": 0.1295761913061142,
"eval_mse": 0.037380401045084,
"eval_r2": 0.40697699949296745,
"eval_rmse": 0.19334012269973755,
"eval_runtime": 65.624,
"eval_samples_per_second": 58.576,
"eval_steps_per_second": 0.93,
"learning_rate": 1e-05,
"step": 12851
},
{
"epoch": 71.8232044198895,
"grad_norm": 0.35482099652290344,
"learning_rate": 1e-05,
"loss": 0.366,
"step": 13000
},
{
"epoch": 72.0,
"eval_explained_variance": 0.40842239214823794,
"eval_loss": 0.35571375489234924,
"eval_mae": 0.13028408586978912,
"eval_mse": 0.03734128177165985,
"eval_r2": 0.40698361470433536,
"eval_rmse": 0.19323892891407013,
"eval_runtime": 64.0529,
"eval_samples_per_second": 60.013,
"eval_steps_per_second": 0.952,
"learning_rate": 1e-05,
"step": 13032
},
{
"epoch": 73.0,
"eval_explained_variance": 0.4089708603345431,
"eval_loss": 0.3552262485027313,
"eval_mae": 0.12985268235206604,
"eval_mse": 0.037223465740680695,
"eval_r2": 0.408222457948687,
"eval_rmse": 0.1929338425397873,
"eval_runtime": 65.5971,
"eval_samples_per_second": 58.6,
"eval_steps_per_second": 0.93,
"learning_rate": 1.0000000000000002e-06,
"step": 13213
},
{
"epoch": 74.0,
"eval_explained_variance": 0.40937405824661255,
"eval_loss": 0.35516515374183655,
"eval_mae": 0.1281428188085556,
"eval_mse": 0.03721009939908981,
"eval_r2": 0.4087432799234766,
"eval_rmse": 0.1928991973400116,
"eval_runtime": 63.5094,
"eval_samples_per_second": 60.526,
"eval_steps_per_second": 0.96,
"learning_rate": 1.0000000000000002e-06,
"step": 13394
},
{
"epoch": 74.58563535911603,
"grad_norm": 0.20831693708896637,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.3654,
"step": 13500
},
{
"epoch": 75.0,
"eval_explained_variance": 0.40568819871315587,
"eval_loss": 0.3558255434036255,
"eval_mae": 0.13025221228599548,
"eval_mse": 0.037474822252988815,
"eval_r2": 0.40474793306670837,
"eval_rmse": 0.193584144115448,
"eval_runtime": 63.853,
"eval_samples_per_second": 60.201,
"eval_steps_per_second": 0.955,
"learning_rate": 1.0000000000000002e-06,
"step": 13575
},
{
"epoch": 76.0,
"eval_explained_variance": 0.408390985085414,
"eval_loss": 0.3555220663547516,
"eval_mae": 0.12769028544425964,
"eval_mse": 0.03735670447349548,
"eval_r2": 0.40610327648301114,
"eval_rmse": 0.19327881932258606,
"eval_runtime": 66.3493,
"eval_samples_per_second": 57.936,
"eval_steps_per_second": 0.919,
"learning_rate": 1.0000000000000002e-06,
"step": 13756
},
{
"epoch": 77.0,
"eval_explained_variance": 0.4046147374006418,
"eval_loss": 0.35615718364715576,
"eval_mae": 0.13205072283744812,
"eval_mse": 0.037551261484622955,
"eval_r2": 0.4042150129069256,
"eval_rmse": 0.19378148019313812,
"eval_runtime": 65.1729,
"eval_samples_per_second": 58.982,
"eval_steps_per_second": 0.936,
"learning_rate": 1.0000000000000002e-06,
"step": 13937
},
{
"epoch": 77.34806629834254,
"grad_norm": 0.20255261659622192,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.3663,
"step": 14000
},
{
"epoch": 78.0,
"eval_explained_variance": 0.4090478007610028,
"eval_loss": 0.35527750849723816,
"eval_mae": 0.13062655925750732,
"eval_mse": 0.037214502692222595,
"eval_r2": 0.4086604768416133,
"eval_rmse": 0.19291061162948608,
"eval_runtime": 66.5281,
"eval_samples_per_second": 57.78,
"eval_steps_per_second": 0.917,
"learning_rate": 1.0000000000000002e-06,
"step": 14118
},
{
"epoch": 79.0,
"eval_explained_variance": 0.4019758334526649,
"eval_loss": 0.3569395840167999,
"eval_mae": 0.13103225827217102,
"eval_mse": 0.037889137864112854,
"eval_r2": 0.3999096598660514,
"eval_rmse": 0.19465132057666779,
"eval_runtime": 65.9236,
"eval_samples_per_second": 58.31,
"eval_steps_per_second": 0.925,
"learning_rate": 1.0000000000000002e-06,
"step": 14299
},
{
"epoch": 80.0,
"eval_explained_variance": 0.4057845427439763,
"eval_loss": 0.35627198219299316,
"eval_mae": 0.13107524812221527,
"eval_mse": 0.037464920431375504,
"eval_r2": 0.40523034358958093,
"eval_rmse": 0.19355857372283936,
"eval_runtime": 66.5566,
"eval_samples_per_second": 57.755,
"eval_steps_per_second": 0.917,
"learning_rate": 1.0000000000000002e-06,
"step": 14480
},
{
"epoch": 80.11049723756906,
"grad_norm": 0.18743179738521576,
"learning_rate": 1.0000000000000002e-07,
"loss": 0.3655,
"step": 14500
},
{
"epoch": 81.0,
"eval_explained_variance": 0.4091951067631061,
"eval_loss": 0.3555302619934082,
"eval_mae": 0.13077440857887268,
"eval_mse": 0.037267763167619705,
"eval_r2": 0.4078657020062894,
"eval_rmse": 0.1930485963821411,
"eval_runtime": 67.6736,
"eval_samples_per_second": 56.802,
"eval_steps_per_second": 0.901,
"learning_rate": 1.0000000000000002e-07,
"step": 14661
},
{
"epoch": 82.0,
"eval_explained_variance": 0.408656867650839,
"eval_loss": 0.35563620924949646,
"eval_mae": 0.13087815046310425,
"eval_mse": 0.03731405362486839,
"eval_r2": 0.4071799006076709,
"eval_rmse": 0.19316846132278442,
"eval_runtime": 68.4549,
"eval_samples_per_second": 56.154,
"eval_steps_per_second": 0.891,
"learning_rate": 1.0000000000000002e-07,
"step": 14842
},
{
"epoch": 82.87292817679558,
"grad_norm": 0.20405510067939758,
"learning_rate": 1.0000000000000002e-07,
"loss": 0.3651,
"step": 15000
},
{
"epoch": 83.0,
"eval_explained_variance": 0.41021374555734486,
"eval_loss": 0.35571029782295227,
"eval_mae": 0.13036301732063293,
"eval_mse": 0.03731907904148102,
"eval_r2": 0.4073602568430592,
"eval_rmse": 0.19318147003650665,
"eval_runtime": 68.1325,
"eval_samples_per_second": 56.419,
"eval_steps_per_second": 0.895,
"learning_rate": 1.0000000000000002e-07,
"step": 15023
},
{
"epoch": 84.0,
"eval_explained_variance": 0.4082063390658452,
"eval_loss": 0.35581377148628235,
"eval_mae": 0.1305844783782959,
"eval_mse": 0.037393905222415924,
"eval_r2": 0.4062799764902456,
"eval_rmse": 0.19337503612041473,
"eval_runtime": 66.3433,
"eval_samples_per_second": 57.941,
"eval_steps_per_second": 0.919,
"learning_rate": 1.0000000000000002e-07,
"step": 15204
},
{
"epoch": 84.0,
"learning_rate": 1.0000000000000002e-07,
"step": 15204,
"total_flos": 2.180798470217171e+19,
"train_loss": 0.37467605181350044,
"train_runtime": 24668.9414,
"train_samples_per_second": 46.707,
"train_steps_per_second": 0.734
}
],
"logging_steps": 500,
"max_steps": 18100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.180798470217171e+19,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}