{ "best_metric": 0.3337731659412384, "best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/drone/drone-DinoVdeau-large-2024_09_16-batch-size64_epochs100_freeze/checkpoint-12060", "epoch": 77.0, "eval_steps": 500, "global_step": 13860, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_explained_variance": 0.2541693607966105, "eval_loss": 0.36081835627555847, "eval_mae": 0.1465730369091034, "eval_mse": 0.042920876294374466, "eval_r2": 0.24390189487252778, "eval_rmse": 0.20717354118824005, "eval_runtime": 63.3404, "eval_samples_per_second": 60.798, "eval_steps_per_second": 0.963, "learning_rate": 0.001, "step": 180 }, { "epoch": 2.0, "eval_explained_variance": 0.29684373140335085, "eval_loss": 0.3499707877635956, "eval_mae": 0.1344238668680191, "eval_mse": 0.040213558822870255, "eval_r2": 0.29433247839872884, "eval_rmse": 0.2005331814289093, "eval_runtime": 62.9751, "eval_samples_per_second": 61.151, "eval_steps_per_second": 0.969, "learning_rate": 0.001, "step": 360 }, { "epoch": 2.7777777777777777, "grad_norm": 0.24828869104385376, "learning_rate": 0.001, "loss": 0.4245, "step": 500 }, { "epoch": 3.0, "eval_explained_variance": 0.30147211949030556, "eval_loss": 0.348723828792572, "eval_mae": 0.13415758311748505, "eval_mse": 0.039820797741413116, "eval_r2": 0.2990147755741232, "eval_rmse": 0.19955149292945862, "eval_runtime": 62.9656, "eval_samples_per_second": 61.16, "eval_steps_per_second": 0.969, "learning_rate": 0.001, "step": 540 }, { "epoch": 4.0, "eval_explained_variance": 0.30280657211939493, "eval_loss": 0.34868186712265015, "eval_mae": 0.1321154236793518, "eval_mse": 0.039921365678310394, "eval_r2": 0.29809857426586617, "eval_rmse": 0.19980332255363464, "eval_runtime": 62.5917, "eval_samples_per_second": 61.526, "eval_steps_per_second": 0.975, "learning_rate": 0.001, "step": 720 }, { "epoch": 5.0, "eval_explained_variance": 0.30923322041829426, "eval_loss": 0.3478679955005646, "eval_mae": 0.13809293508529663, "eval_mse": 0.03967553377151489, "eval_r2": 0.3042232042211434, "eval_rmse": 0.19918718934059143, "eval_runtime": 63.1019, "eval_samples_per_second": 61.028, "eval_steps_per_second": 0.967, "learning_rate": 0.001, "step": 900 }, { "epoch": 5.555555555555555, "grad_norm": 0.1865607500076294, "learning_rate": 0.001, "loss": 0.3599, "step": 1000 }, { "epoch": 6.0, "eval_explained_variance": 0.3188656767209371, "eval_loss": 0.3459060490131378, "eval_mae": 0.1357322782278061, "eval_mse": 0.03895646333694458, "eval_r2": 0.315883105588613, "eval_rmse": 0.1973739117383957, "eval_runtime": 62.3971, "eval_samples_per_second": 61.718, "eval_steps_per_second": 0.978, "learning_rate": 0.001, "step": 1080 }, { "epoch": 7.0, "eval_explained_variance": 0.3159688035647074, "eval_loss": 0.3450120985507965, "eval_mae": 0.1327129602432251, "eval_mse": 0.038980767130851746, "eval_r2": 0.3128088622831601, "eval_rmse": 0.19743546843528748, "eval_runtime": 62.076, "eval_samples_per_second": 62.037, "eval_steps_per_second": 0.983, "learning_rate": 0.001, "step": 1260 }, { "epoch": 8.0, "eval_explained_variance": 0.32217884063720703, "eval_loss": 0.3441869616508484, "eval_mae": 0.1320338100194931, "eval_mse": 0.038429226726293564, "eval_r2": 0.32032579871679767, "eval_rmse": 0.19603374600410461, "eval_runtime": 63.9365, "eval_samples_per_second": 60.232, "eval_steps_per_second": 0.954, "learning_rate": 0.001, "step": 1440 }, { "epoch": 8.333333333333334, "grad_norm": 0.15372440218925476, "learning_rate": 0.001, "loss": 0.3558, "step": 1500 }, { "epoch": 9.0, "eval_explained_variance": 0.3277563373247782, "eval_loss": 0.342985063791275, "eval_mae": 0.1313859075307846, "eval_mse": 0.038142018020153046, "eval_r2": 0.3266377146752823, "eval_rmse": 0.19529981911182404, "eval_runtime": 64.7671, "eval_samples_per_second": 59.459, "eval_steps_per_second": 0.942, "learning_rate": 0.001, "step": 1620 }, { "epoch": 10.0, "eval_explained_variance": 0.3305574059486389, "eval_loss": 0.34308692812919617, "eval_mae": 0.13377133011817932, "eval_mse": 0.03810949996113777, "eval_r2": 0.32670753180687356, "eval_rmse": 0.19521655142307281, "eval_runtime": 62.4523, "eval_samples_per_second": 61.663, "eval_steps_per_second": 0.977, "learning_rate": 0.001, "step": 1800 }, { "epoch": 11.0, "eval_explained_variance": 0.3265243093172709, "eval_loss": 0.3441123068332672, "eval_mae": 0.13503584265708923, "eval_mse": 0.038502778857946396, "eval_r2": 0.3219845692298351, "eval_rmse": 0.1962212473154068, "eval_runtime": 62.6133, "eval_samples_per_second": 61.504, "eval_steps_per_second": 0.974, "learning_rate": 0.001, "step": 1980 }, { "epoch": 11.11111111111111, "grad_norm": 0.13863442838191986, "learning_rate": 0.001, "loss": 0.3537, "step": 2000 }, { "epoch": 12.0, "eval_explained_variance": 0.33473743200302125, "eval_loss": 0.3415883183479309, "eval_mae": 0.12936046719551086, "eval_mse": 0.037833068519830704, "eval_r2": 0.33150620992608343, "eval_rmse": 0.19450724124908447, "eval_runtime": 62.1996, "eval_samples_per_second": 61.914, "eval_steps_per_second": 0.981, "learning_rate": 0.001, "step": 2160 }, { "epoch": 13.0, "eval_explained_variance": 0.33251333634058633, "eval_loss": 0.3423134684562683, "eval_mae": 0.13204918801784515, "eval_mse": 0.038014866411685944, "eval_r2": 0.32960302381449236, "eval_rmse": 0.19497402012348175, "eval_runtime": 64.3868, "eval_samples_per_second": 59.81, "eval_steps_per_second": 0.947, "learning_rate": 0.001, "step": 2340 }, { "epoch": 13.88888888888889, "grad_norm": 0.14617109298706055, "learning_rate": 0.001, "loss": 0.3525, "step": 2500 }, { "epoch": 14.0, "eval_explained_variance": 0.32708331346511843, "eval_loss": 0.34299299120903015, "eval_mae": 0.13094526529312134, "eval_mse": 0.03828390687704086, "eval_r2": 0.3245196527423962, "eval_rmse": 0.1956627368927002, "eval_runtime": 66.8909, "eval_samples_per_second": 57.571, "eval_steps_per_second": 0.912, "learning_rate": 0.001, "step": 2520 }, { "epoch": 15.0, "eval_explained_variance": 0.3325016220410665, "eval_loss": 0.34356197714805603, "eval_mae": 0.13530610501766205, "eval_mse": 0.03825169429183006, "eval_r2": 0.3277798911211838, "eval_rmse": 0.1955804079771042, "eval_runtime": 65.6584, "eval_samples_per_second": 58.652, "eval_steps_per_second": 0.929, "learning_rate": 0.001, "step": 2700 }, { "epoch": 16.0, "eval_explained_variance": 0.33054962555567424, "eval_loss": 0.34228795766830444, "eval_mae": 0.1311980038881302, "eval_mse": 0.038003597408533096, "eval_r2": 0.3293309699222721, "eval_rmse": 0.19494511187076569, "eval_runtime": 66.054, "eval_samples_per_second": 58.301, "eval_steps_per_second": 0.923, "learning_rate": 0.001, "step": 2880 }, { "epoch": 16.666666666666668, "grad_norm": 0.10535696148872375, "learning_rate": 0.001, "loss": 0.3514, "step": 3000 }, { "epoch": 17.0, "eval_explained_variance": 0.33247481981913246, "eval_loss": 0.34247255325317383, "eval_mae": 0.130988210439682, "eval_mse": 0.03792767971754074, "eval_r2": 0.33058644552459077, "eval_rmse": 0.19475029408931732, "eval_runtime": 65.3798, "eval_samples_per_second": 58.902, "eval_steps_per_second": 0.933, "learning_rate": 0.001, "step": 3060 }, { "epoch": 18.0, "eval_explained_variance": 0.3258938948313395, "eval_loss": 0.34577205777168274, "eval_mae": 0.13508611917495728, "eval_mse": 0.039037104696035385, "eval_r2": 0.31874432018815235, "eval_rmse": 0.19757810235023499, "eval_runtime": 65.3435, "eval_samples_per_second": 58.935, "eval_steps_per_second": 0.934, "learning_rate": 0.001, "step": 3240 }, { "epoch": 19.0, "eval_explained_variance": 0.34337151447931924, "eval_loss": 0.3397020399570465, "eval_mae": 0.12969434261322021, "eval_mse": 0.037133779376745224, "eval_r2": 0.3425327065302982, "eval_rmse": 0.19270126521587372, "eval_runtime": 65.4239, "eval_samples_per_second": 58.862, "eval_steps_per_second": 0.932, "learning_rate": 0.0001, "step": 3420 }, { "epoch": 19.444444444444443, "grad_norm": 0.5233563184738159, "learning_rate": 0.0001, "loss": 0.3504, "step": 3500 }, { "epoch": 20.0, "eval_explained_variance": 0.3440438469250997, "eval_loss": 0.33953577280044556, "eval_mae": 0.1284000277519226, "eval_mse": 0.037087082862854004, "eval_r2": 0.34341303394538236, "eval_rmse": 0.19258007407188416, "eval_runtime": 70.7263, "eval_samples_per_second": 54.449, "eval_steps_per_second": 0.862, "learning_rate": 0.0001, "step": 3600 }, { "epoch": 21.0, "eval_explained_variance": 0.34820417960484823, "eval_loss": 0.33860594034194946, "eval_mae": 0.12879149615764618, "eval_mse": 0.03677341714501381, "eval_r2": 0.34776048986398916, "eval_rmse": 0.1917639672756195, "eval_runtime": 64.058, "eval_samples_per_second": 60.117, "eval_steps_per_second": 0.952, "learning_rate": 0.0001, "step": 3780 }, { "epoch": 22.0, "eval_explained_variance": 0.3486798485120138, "eval_loss": 0.3384495973587036, "eval_mae": 0.1275842934846878, "eval_mse": 0.03677380457520485, "eval_r2": 0.347598729712472, "eval_rmse": 0.1917649656534195, "eval_runtime": 63.8409, "eval_samples_per_second": 60.322, "eval_steps_per_second": 0.956, "learning_rate": 0.0001, "step": 3960 }, { "epoch": 22.22222222222222, "grad_norm": 0.09594211727380753, "learning_rate": 0.0001, "loss": 0.3458, "step": 4000 }, { "epoch": 23.0, "eval_explained_variance": 0.3494525194168091, "eval_loss": 0.33845821022987366, "eval_mae": 0.1285364329814911, "eval_mse": 0.03674604371190071, "eval_r2": 0.3479367857148791, "eval_rmse": 0.19169257581233978, "eval_runtime": 64.1141, "eval_samples_per_second": 60.065, "eval_steps_per_second": 0.951, "learning_rate": 0.0001, "step": 4140 }, { "epoch": 24.0, "eval_explained_variance": 0.3512475649515788, "eval_loss": 0.3379111588001251, "eval_mae": 0.12811453640460968, "eval_mse": 0.036613743752241135, "eval_r2": 0.3500040885646508, "eval_rmse": 0.1913471817970276, "eval_runtime": 64.2637, "eval_samples_per_second": 59.925, "eval_steps_per_second": 0.949, "learning_rate": 0.0001, "step": 4320 }, { "epoch": 25.0, "grad_norm": 0.11646634340286255, "learning_rate": 0.0001, "loss": 0.3444, "step": 4500 }, { "epoch": 25.0, "eval_explained_variance": 0.3538547078768412, "eval_loss": 0.33727332949638367, "eval_mae": 0.12702713906764984, "eval_mse": 0.03639335185289383, "eval_r2": 0.35317531845679523, "eval_rmse": 0.19077041745185852, "eval_runtime": 64.8763, "eval_samples_per_second": 59.359, "eval_steps_per_second": 0.94, "learning_rate": 0.0001, "step": 4500 }, { "epoch": 26.0, "eval_explained_variance": 0.35404903093973794, "eval_loss": 0.3374665081501007, "eval_mae": 0.12778176367282867, "eval_mse": 0.036440037190914154, "eval_r2": 0.35285777629596343, "eval_rmse": 0.19089274108409882, "eval_runtime": 66.0475, "eval_samples_per_second": 58.306, "eval_steps_per_second": 0.924, "learning_rate": 0.0001, "step": 4680 }, { "epoch": 27.0, "eval_explained_variance": 0.3557887593905131, "eval_loss": 0.3370736837387085, "eval_mae": 0.12684836983680725, "eval_mse": 0.0362778902053833, "eval_r2": 0.3550974370033309, "eval_rmse": 0.19046755135059357, "eval_runtime": 64.2759, "eval_samples_per_second": 59.914, "eval_steps_per_second": 0.949, "learning_rate": 0.0001, "step": 4860 }, { "epoch": 27.77777777777778, "grad_norm": 0.12078458070755005, "learning_rate": 0.0001, "loss": 0.3432, "step": 5000 }, { "epoch": 28.0, "eval_explained_variance": 0.35624225934346515, "eval_loss": 0.3374920189380646, "eval_mae": 0.12884411215782166, "eval_mse": 0.03637442737817764, "eval_r2": 0.3540297931137497, "eval_rmse": 0.1907208114862442, "eval_runtime": 62.9406, "eval_samples_per_second": 61.185, "eval_steps_per_second": 0.969, "learning_rate": 0.0001, "step": 5040 }, { "epoch": 29.0, "eval_explained_variance": 0.35836246808369954, "eval_loss": 0.3366614878177643, "eval_mae": 0.12721382081508636, "eval_mse": 0.03616877272725105, "eval_r2": 0.35692547282759735, "eval_rmse": 0.19018089771270752, "eval_runtime": 63.5617, "eval_samples_per_second": 60.587, "eval_steps_per_second": 0.96, "learning_rate": 0.0001, "step": 5220 }, { "epoch": 30.0, "eval_explained_variance": 0.35781089862187704, "eval_loss": 0.33686068654060364, "eval_mae": 0.12742702662944794, "eval_mse": 0.036219749599695206, "eval_r2": 0.35697372280957457, "eval_rmse": 0.1903148740530014, "eval_runtime": 64.5932, "eval_samples_per_second": 59.619, "eval_steps_per_second": 0.944, "learning_rate": 0.0001, "step": 5400 }, { "epoch": 30.555555555555557, "grad_norm": 0.12843400239944458, "learning_rate": 0.0001, "loss": 0.3436, "step": 5500 }, { "epoch": 31.0, "eval_explained_variance": 0.35743744373321534, "eval_loss": 0.33649107813835144, "eval_mae": 0.1261633038520813, "eval_mse": 0.03617921844124794, "eval_r2": 0.3570007265746181, "eval_rmse": 0.19020836055278778, "eval_runtime": 64.4068, "eval_samples_per_second": 59.792, "eval_steps_per_second": 0.947, "learning_rate": 0.0001, "step": 5580 }, { "epoch": 32.0, "eval_explained_variance": 0.3598915974299113, "eval_loss": 0.3362315893173218, "eval_mae": 0.12689867615699768, "eval_mse": 0.03607836738228798, "eval_r2": 0.3585556812333787, "eval_rmse": 0.18994306027889252, "eval_runtime": 64.3497, "eval_samples_per_second": 59.845, "eval_steps_per_second": 0.948, "learning_rate": 0.0001, "step": 5760 }, { "epoch": 33.0, "eval_explained_variance": 0.3600114901860555, "eval_loss": 0.3359103798866272, "eval_mae": 0.12590400874614716, "eval_mse": 0.03598121553659439, "eval_r2": 0.3594679732877264, "eval_rmse": 0.18968714773654938, "eval_runtime": 62.4426, "eval_samples_per_second": 61.673, "eval_steps_per_second": 0.977, "learning_rate": 0.0001, "step": 5940 }, { "epoch": 33.333333333333336, "grad_norm": 0.1340547353029251, "learning_rate": 0.0001, "loss": 0.3422, "step": 6000 }, { "epoch": 34.0, "eval_explained_variance": 0.3614112695058187, "eval_loss": 0.3358704149723053, "eval_mae": 0.12649337947368622, "eval_mse": 0.03595872223377228, "eval_r2": 0.36047318612101337, "eval_rmse": 0.18962785601615906, "eval_runtime": 63.7163, "eval_samples_per_second": 60.44, "eval_steps_per_second": 0.957, "learning_rate": 0.0001, "step": 6120 }, { "epoch": 35.0, "eval_explained_variance": 0.36142420768737793, "eval_loss": 0.3358561396598816, "eval_mae": 0.12690505385398865, "eval_mse": 0.035979654639959335, "eval_r2": 0.36028295338805283, "eval_rmse": 0.18968303501605988, "eval_runtime": 64.286, "eval_samples_per_second": 59.904, "eval_steps_per_second": 0.949, "learning_rate": 0.0001, "step": 6300 }, { "epoch": 36.0, "eval_explained_variance": 0.3616306185722351, "eval_loss": 0.3358567953109741, "eval_mae": 0.1270161122083664, "eval_mse": 0.03595282882452011, "eval_r2": 0.36070464542617636, "eval_rmse": 0.18961231410503387, "eval_runtime": 65.3529, "eval_samples_per_second": 58.926, "eval_steps_per_second": 0.933, "learning_rate": 0.0001, "step": 6480 }, { "epoch": 36.111111111111114, "grad_norm": 0.15736152231693268, "learning_rate": 0.0001, "loss": 0.3419, "step": 6500 }, { "epoch": 37.0, "eval_explained_variance": 0.3638279716173808, "eval_loss": 0.33518898487091064, "eval_mae": 0.12592247128486633, "eval_mse": 0.035799965262413025, "eval_r2": 0.3632553215862593, "eval_rmse": 0.18920879065990448, "eval_runtime": 61.7965, "eval_samples_per_second": 62.317, "eval_steps_per_second": 0.987, "learning_rate": 0.0001, "step": 6660 }, { "epoch": 38.0, "eval_explained_variance": 0.3633318622907003, "eval_loss": 0.3352443277835846, "eval_mae": 0.12485368549823761, "eval_mse": 0.03583494946360588, "eval_r2": 0.36236021325168755, "eval_rmse": 0.18930120766162872, "eval_runtime": 62.963, "eval_samples_per_second": 61.163, "eval_steps_per_second": 0.969, "learning_rate": 0.0001, "step": 6840 }, { "epoch": 38.888888888888886, "grad_norm": 0.15783248841762543, "learning_rate": 0.0001, "loss": 0.3411, "step": 7000 }, { "epoch": 39.0, "eval_explained_variance": 0.3634601871172587, "eval_loss": 0.3356732130050659, "eval_mae": 0.12721793353557587, "eval_mse": 0.035907089710235596, "eval_r2": 0.3618933511816494, "eval_rmse": 0.1894916594028473, "eval_runtime": 70.2586, "eval_samples_per_second": 54.812, "eval_steps_per_second": 0.868, "learning_rate": 0.0001, "step": 7020 }, { "epoch": 40.0, "eval_explained_variance": 0.3626919905344645, "eval_loss": 0.33541348576545715, "eval_mae": 0.12567447125911713, "eval_mse": 0.0358363538980484, "eval_r2": 0.36236915434279976, "eval_rmse": 0.189304918050766, "eval_runtime": 62.4927, "eval_samples_per_second": 61.623, "eval_steps_per_second": 0.976, "learning_rate": 0.0001, "step": 7200 }, { "epoch": 41.0, "eval_explained_variance": 0.3647373716036479, "eval_loss": 0.33518943190574646, "eval_mae": 0.12578776478767395, "eval_mse": 0.03580744192004204, "eval_r2": 0.36348323098837665, "eval_rmse": 0.18922854959964752, "eval_runtime": 62.5653, "eval_samples_per_second": 61.552, "eval_steps_per_second": 0.975, "learning_rate": 0.0001, "step": 7380 }, { "epoch": 41.666666666666664, "grad_norm": 0.13081993162631989, "learning_rate": 0.0001, "loss": 0.3405, "step": 7500 }, { "epoch": 42.0, "eval_explained_variance": 0.36353408892949424, "eval_loss": 0.33517739176750183, "eval_mae": 0.12543819844722748, "eval_mse": 0.0358295664191246, "eval_r2": 0.3624927930566926, "eval_rmse": 0.1892869919538498, "eval_runtime": 62.3201, "eval_samples_per_second": 61.794, "eval_steps_per_second": 0.979, "learning_rate": 0.0001, "step": 7560 }, { "epoch": 43.0, "eval_explained_variance": 0.3640146851539612, "eval_loss": 0.3352269232273102, "eval_mae": 0.12432164698839188, "eval_mse": 0.035806283354759216, "eval_r2": 0.36300371981038576, "eval_rmse": 0.1892254799604416, "eval_runtime": 62.6336, "eval_samples_per_second": 61.485, "eval_steps_per_second": 0.974, "learning_rate": 0.0001, "step": 7740 }, { "epoch": 44.0, "eval_explained_variance": 0.36538604497909544, "eval_loss": 0.3349473774433136, "eval_mae": 0.12483787536621094, "eval_mse": 0.035719502717256546, "eval_r2": 0.36435684013877145, "eval_rmse": 0.18899603188037872, "eval_runtime": 61.9264, "eval_samples_per_second": 62.187, "eval_steps_per_second": 0.985, "learning_rate": 1e-05, "step": 7920 }, { "epoch": 44.44444444444444, "grad_norm": 0.16597294807434082, "learning_rate": 1e-05, "loss": 0.3408, "step": 8000 }, { "epoch": 45.0, "eval_explained_variance": 0.36700586080551145, "eval_loss": 0.33477169275283813, "eval_mae": 0.12559731304645538, "eval_mse": 0.03562255576252937, "eval_r2": 0.3660886793741337, "eval_rmse": 0.18873938918113708, "eval_runtime": 62.322, "eval_samples_per_second": 61.792, "eval_steps_per_second": 0.979, "learning_rate": 1e-05, "step": 8100 }, { "epoch": 46.0, "eval_explained_variance": 0.3669166843096415, "eval_loss": 0.33469367027282715, "eval_mae": 0.12542863190174103, "eval_mse": 0.03559611365199089, "eval_r2": 0.3661504315012644, "eval_rmse": 0.1886693239212036, "eval_runtime": 64.1553, "eval_samples_per_second": 60.026, "eval_steps_per_second": 0.951, "learning_rate": 1e-05, "step": 8280 }, { "epoch": 47.0, "eval_explained_variance": 0.3675737778345744, "eval_loss": 0.33472740650177, "eval_mae": 0.12506955862045288, "eval_mse": 0.03560151532292366, "eval_r2": 0.3663928527459443, "eval_rmse": 0.1886836439371109, "eval_runtime": 62.1886, "eval_samples_per_second": 61.925, "eval_steps_per_second": 0.981, "learning_rate": 1e-05, "step": 8460 }, { "epoch": 47.22222222222222, "grad_norm": 0.18154595792293549, "learning_rate": 1e-05, "loss": 0.3389, "step": 8500 }, { "epoch": 48.0, "eval_explained_variance": 0.36750999291737874, "eval_loss": 0.33466145396232605, "eval_mae": 0.12558797001838684, "eval_mse": 0.03559773787856102, "eval_r2": 0.3663117022053123, "eval_rmse": 0.18867363035678864, "eval_runtime": 63.9966, "eval_samples_per_second": 60.175, "eval_steps_per_second": 0.953, "learning_rate": 1e-05, "step": 8640 }, { "epoch": 49.0, "eval_explained_variance": 0.3675870537757874, "eval_loss": 0.3346670866012573, "eval_mae": 0.12523934245109558, "eval_mse": 0.03558367118239403, "eval_r2": 0.3663468108507958, "eval_rmse": 0.18863634765148163, "eval_runtime": 62.5633, "eval_samples_per_second": 61.554, "eval_steps_per_second": 0.975, "learning_rate": 1e-05, "step": 8820 }, { "epoch": 50.0, "grad_norm": 0.15163227915763855, "learning_rate": 1e-05, "loss": 0.3392, "step": 9000 }, { "epoch": 50.0, "eval_explained_variance": 0.3682251930236816, "eval_loss": 0.3344159722328186, "eval_mae": 0.12486516684293747, "eval_mse": 0.03552938997745514, "eval_r2": 0.3673761105155409, "eval_rmse": 0.18849241733551025, "eval_runtime": 62.8019, "eval_samples_per_second": 61.32, "eval_steps_per_second": 0.971, "learning_rate": 1e-05, "step": 9000 }, { "epoch": 51.0, "eval_explained_variance": 0.3685545603434245, "eval_loss": 0.3344169855117798, "eval_mae": 0.1250401884317398, "eval_mse": 0.03551666438579559, "eval_r2": 0.3675034686133137, "eval_rmse": 0.188458651304245, "eval_runtime": 63.251, "eval_samples_per_second": 60.884, "eval_steps_per_second": 0.964, "learning_rate": 1e-05, "step": 9180 }, { "epoch": 52.0, "eval_explained_variance": 0.36793676217397053, "eval_loss": 0.33431872725486755, "eval_mae": 0.12446907162666321, "eval_mse": 0.03551502525806427, "eval_r2": 0.367407841227485, "eval_rmse": 0.1884543001651764, "eval_runtime": 63.6497, "eval_samples_per_second": 60.503, "eval_steps_per_second": 0.958, "learning_rate": 1e-05, "step": 9360 }, { "epoch": 52.77777777777778, "grad_norm": 0.15340301394462585, "learning_rate": 1e-05, "loss": 0.3383, "step": 9500 }, { "epoch": 53.0, "eval_explained_variance": 0.36925456921259564, "eval_loss": 0.3343693017959595, "eval_mae": 0.12511175870895386, "eval_mse": 0.03549981862306595, "eval_r2": 0.36788043363052797, "eval_rmse": 0.18841396272182465, "eval_runtime": 62.6861, "eval_samples_per_second": 61.433, "eval_steps_per_second": 0.973, "learning_rate": 1e-05, "step": 9540 }, { "epoch": 54.0, "eval_explained_variance": 0.36938191652297975, "eval_loss": 0.33439403772354126, "eval_mae": 0.12479714304208755, "eval_mse": 0.03547336533665657, "eval_r2": 0.3682909151854633, "eval_rmse": 0.18834374845027924, "eval_runtime": 65.5859, "eval_samples_per_second": 58.717, "eval_steps_per_second": 0.93, "learning_rate": 1e-05, "step": 9720 }, { "epoch": 55.0, "eval_explained_variance": 0.3684127966562907, "eval_loss": 0.3343343138694763, "eval_mae": 0.12404093891382217, "eval_mse": 0.035542283207178116, "eval_r2": 0.36729971341845385, "eval_rmse": 0.18852661550045013, "eval_runtime": 63.3299, "eval_samples_per_second": 60.809, "eval_steps_per_second": 0.963, "learning_rate": 1e-05, "step": 9900 }, { "epoch": 55.55555555555556, "grad_norm": 0.17277947068214417, "learning_rate": 1e-05, "loss": 0.3389, "step": 10000 }, { "epoch": 56.0, "eval_explained_variance": 0.36942450602849325, "eval_loss": 0.3342228829860687, "eval_mae": 0.12461840361356735, "eval_mse": 0.03544970974326134, "eval_r2": 0.36858695945684256, "eval_rmse": 0.18828094005584717, "eval_runtime": 62.4539, "eval_samples_per_second": 61.662, "eval_steps_per_second": 0.977, "learning_rate": 1e-05, "step": 10080 }, { "epoch": 57.0, "eval_explained_variance": 0.36928979953130087, "eval_loss": 0.3342856764793396, "eval_mae": 0.12474922835826874, "eval_mse": 0.035442013293504715, "eval_r2": 0.3687052486171652, "eval_rmse": 0.1882604956626892, "eval_runtime": 64.3533, "eval_samples_per_second": 59.842, "eval_steps_per_second": 0.948, "learning_rate": 1e-05, "step": 10260 }, { "epoch": 58.0, "eval_explained_variance": 0.3687208811442057, "eval_loss": 0.33420124650001526, "eval_mae": 0.12463465332984924, "eval_mse": 0.035497527569532394, "eval_r2": 0.3676875263144609, "eval_rmse": 0.18840786814689636, "eval_runtime": 64.6001, "eval_samples_per_second": 59.613, "eval_steps_per_second": 0.944, "learning_rate": 1e-05, "step": 10440 }, { "epoch": 58.333333333333336, "grad_norm": 0.12022228538990021, "learning_rate": 1e-05, "loss": 0.3374, "step": 10500 }, { "epoch": 59.0, "eval_explained_variance": 0.37048781315485635, "eval_loss": 0.33398088812828064, "eval_mae": 0.12423835694789886, "eval_mse": 0.03539055958390236, "eval_r2": 0.36958822652787654, "eval_rmse": 0.18812379240989685, "eval_runtime": 64.0827, "eval_samples_per_second": 60.094, "eval_steps_per_second": 0.952, "learning_rate": 1e-05, "step": 10620 }, { "epoch": 60.0, "eval_explained_variance": 0.37067395051320395, "eval_loss": 0.3339443802833557, "eval_mae": 0.12487544864416122, "eval_mse": 0.035368870943784714, "eval_r2": 0.3699269522159971, "eval_rmse": 0.18806613981723785, "eval_runtime": 63.8143, "eval_samples_per_second": 60.347, "eval_steps_per_second": 0.956, "learning_rate": 1e-05, "step": 10800 }, { "epoch": 61.0, "eval_explained_variance": 0.3699048717816671, "eval_loss": 0.33412715792655945, "eval_mae": 0.12488223612308502, "eval_mse": 0.035459477454423904, "eval_r2": 0.3686492933307029, "eval_rmse": 0.18830686807632446, "eval_runtime": 63.8278, "eval_samples_per_second": 60.334, "eval_steps_per_second": 0.956, "learning_rate": 1e-05, "step": 10980 }, { "epoch": 61.111111111111114, "grad_norm": 0.16086408495903015, "learning_rate": 1e-05, "loss": 0.3385, "step": 11000 }, { "epoch": 62.0, "eval_explained_variance": 0.37065960963567096, "eval_loss": 0.3342025578022003, "eval_mae": 0.12435286492109299, "eval_mse": 0.03547635301947594, "eval_r2": 0.36863099894792306, "eval_rmse": 0.18835167586803436, "eval_runtime": 64.6869, "eval_samples_per_second": 59.533, "eval_steps_per_second": 0.943, "learning_rate": 1e-05, "step": 11160 }, { "epoch": 63.0, "eval_explained_variance": 0.3693171819051107, "eval_loss": 0.3341706395149231, "eval_mae": 0.12502440810203552, "eval_mse": 0.03547227010130882, "eval_r2": 0.3684136605464969, "eval_rmse": 0.18834082782268524, "eval_runtime": 63.8334, "eval_samples_per_second": 60.329, "eval_steps_per_second": 0.956, "learning_rate": 1e-05, "step": 11340 }, { "epoch": 63.888888888888886, "grad_norm": 0.16820169985294342, "learning_rate": 1e-05, "loss": 0.3378, "step": 11500 }, { "epoch": 64.0, "eval_explained_variance": 0.37020493745803834, "eval_loss": 0.33427801728248596, "eval_mae": 0.12529656291007996, "eval_mse": 0.03546770289540291, "eval_r2": 0.3687035885402607, "eval_rmse": 0.18832871317863464, "eval_runtime": 66.458, "eval_samples_per_second": 57.946, "eval_steps_per_second": 0.918, "learning_rate": 1e-05, "step": 11520 }, { "epoch": 65.0, "eval_explained_variance": 0.3690663933753967, "eval_loss": 0.3341085910797119, "eval_mae": 0.12434254586696625, "eval_mse": 0.03546525537967682, "eval_r2": 0.3683970608182981, "eval_rmse": 0.18832221627235413, "eval_runtime": 63.2385, "eval_samples_per_second": 60.896, "eval_steps_per_second": 0.965, "learning_rate": 1e-05, "step": 11700 }, { "epoch": 66.0, "eval_explained_variance": 0.3708378434181213, "eval_loss": 0.33402684330940247, "eval_mae": 0.12424939125776291, "eval_mse": 0.035455718636512756, "eval_r2": 0.3690868578773909, "eval_rmse": 0.18829688429832458, "eval_runtime": 65.0993, "eval_samples_per_second": 59.156, "eval_steps_per_second": 0.937, "learning_rate": 1e-05, "step": 11880 }, { "epoch": 66.66666666666667, "grad_norm": 0.17261184751987457, "learning_rate": 1.0000000000000002e-06, "loss": 0.3379, "step": 12000 }, { "epoch": 67.0, "eval_explained_variance": 0.3706888159116109, "eval_loss": 0.3337731659412384, "eval_mae": 0.124094158411026, "eval_mse": 0.03534681722521782, "eval_r2": 0.37028392085368445, "eval_rmse": 0.18800748884677887, "eval_runtime": 63.949, "eval_samples_per_second": 60.22, "eval_steps_per_second": 0.954, "learning_rate": 1.0000000000000002e-06, "step": 12060 }, { "epoch": 68.0, "eval_explained_variance": 0.3704707185427348, "eval_loss": 0.33422085642814636, "eval_mae": 0.12483730167150497, "eval_mse": 0.0354764387011528, "eval_r2": 0.3687023490234722, "eval_rmse": 0.18835189938545227, "eval_runtime": 63.5508, "eval_samples_per_second": 60.597, "eval_steps_per_second": 0.96, "learning_rate": 1.0000000000000002e-06, "step": 12240 }, { "epoch": 69.0, "eval_explained_variance": 0.371040137608846, "eval_loss": 0.3342364430427551, "eval_mae": 0.12574079632759094, "eval_mse": 0.035477764904499054, "eval_r2": 0.36882738308793905, "eval_rmse": 0.18835541605949402, "eval_runtime": 62.1829, "eval_samples_per_second": 61.93, "eval_steps_per_second": 0.981, "learning_rate": 1.0000000000000002e-06, "step": 12420 }, { "epoch": 69.44444444444444, "grad_norm": 0.1958952099084854, "learning_rate": 1.0000000000000002e-06, "loss": 0.3372, "step": 12500 }, { "epoch": 70.0, "eval_explained_variance": 0.3711942513783773, "eval_loss": 0.3338371813297272, "eval_mae": 0.1244586780667305, "eval_mse": 0.035380732268095016, "eval_r2": 0.3700339977150058, "eval_rmse": 0.18809767067432404, "eval_runtime": 62.7037, "eval_samples_per_second": 61.416, "eval_steps_per_second": 0.973, "learning_rate": 1.0000000000000002e-06, "step": 12600 }, { "epoch": 71.0, "eval_explained_variance": 0.3705748677253723, "eval_loss": 0.33424997329711914, "eval_mae": 0.12513956427574158, "eval_mse": 0.03546679764986038, "eval_r2": 0.3686927376254726, "eval_rmse": 0.18832629919052124, "eval_runtime": 62.7164, "eval_samples_per_second": 61.403, "eval_steps_per_second": 0.973, "learning_rate": 1.0000000000000002e-06, "step": 12780 }, { "epoch": 72.0, "eval_explained_variance": 0.3703965306282043, "eval_loss": 0.33384770154953003, "eval_mae": 0.12352322041988373, "eval_mse": 0.03537768870592117, "eval_r2": 0.36968915587109963, "eval_rmse": 0.18808957934379578, "eval_runtime": 61.8275, "eval_samples_per_second": 62.286, "eval_steps_per_second": 0.987, "learning_rate": 1.0000000000000002e-06, "step": 12960 }, { "epoch": 72.22222222222223, "grad_norm": 0.15659202635288239, "learning_rate": 1.0000000000000002e-06, "loss": 0.3379, "step": 13000 }, { "epoch": 73.0, "eval_explained_variance": 0.37064104874928794, "eval_loss": 0.33400118350982666, "eval_mae": 0.1247105821967125, "eval_mse": 0.035410381853580475, "eval_r2": 0.36933392249966623, "eval_rmse": 0.1881764680147171, "eval_runtime": 62.0483, "eval_samples_per_second": 62.065, "eval_steps_per_second": 0.983, "learning_rate": 1.0000000000000002e-06, "step": 13140 }, { "epoch": 74.0, "eval_explained_variance": 0.3708500941594442, "eval_loss": 0.3339782953262329, "eval_mae": 0.12470810115337372, "eval_mse": 0.03539663925766945, "eval_r2": 0.36976878944216374, "eval_rmse": 0.18813994526863098, "eval_runtime": 61.7539, "eval_samples_per_second": 62.36, "eval_steps_per_second": 0.988, "learning_rate": 1.0000000000000002e-07, "step": 13320 }, { "epoch": 75.0, "grad_norm": 0.13758498430252075, "learning_rate": 1.0000000000000002e-07, "loss": 0.3373, "step": 13500 }, { "epoch": 75.0, "eval_explained_variance": 0.37014045715332033, "eval_loss": 0.3339597284793854, "eval_mae": 0.12498915195465088, "eval_mse": 0.03541086986660957, "eval_r2": 0.36933255961612305, "eval_rmse": 0.18817776441574097, "eval_runtime": 62.1997, "eval_samples_per_second": 61.914, "eval_steps_per_second": 0.981, "learning_rate": 1.0000000000000002e-07, "step": 13500 }, { "epoch": 76.0, "eval_explained_variance": 0.3704050302505493, "eval_loss": 0.33381277322769165, "eval_mae": 0.12373475730419159, "eval_mse": 0.03538239002227783, "eval_r2": 0.36965286193507674, "eval_rmse": 0.18810206651687622, "eval_runtime": 62.1744, "eval_samples_per_second": 61.939, "eval_steps_per_second": 0.981, "learning_rate": 1.0000000000000002e-07, "step": 13680 }, { "epoch": 77.0, "eval_explained_variance": 0.37035290797551473, "eval_loss": 0.3340202569961548, "eval_mae": 0.12418477982282639, "eval_mse": 0.0354425273835659, "eval_r2": 0.3689132936263572, "eval_rmse": 0.18826185166835785, "eval_runtime": 63.4682, "eval_samples_per_second": 60.676, "eval_steps_per_second": 0.961, "learning_rate": 1.0000000000000002e-07, "step": 13860 }, { "epoch": 77.0, "learning_rate": 1.0000000000000002e-07, "step": 13860, "total_flos": 1.9987249746747064e+19, "train_loss": 0.3461118524724787, "train_runtime": 22281.3386, "train_samples_per_second": 51.702, "train_steps_per_second": 0.808 } ], "logging_steps": 500, "max_steps": 18000, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9987249746747064e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }