|
{ |
|
"best_metric": 0.20436730980873108, |
|
"best_model_checkpoint": "./ryan03302024/checkpoint-2700", |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 8319, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.9509561657905579, |
|
"learning_rate": 9.969948311095084e-05, |
|
"loss": 0.5096, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5529997944831848, |
|
"learning_rate": 9.939896622190168e-05, |
|
"loss": 0.3519, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.6374471187591553, |
|
"learning_rate": 9.909844933285252e-05, |
|
"loss": 0.3431, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.235526204109192, |
|
"learning_rate": 9.879793244380335e-05, |
|
"loss": 0.3617, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.3133379817008972, |
|
"eval_na_accuracy": 0.876288652420044, |
|
"eval_ordinal_accuracy": 0.41741588711738586, |
|
"eval_ordinal_mae": 0.8465587496757507, |
|
"eval_runtime": 338.0185, |
|
"eval_samples_per_second": 13.239, |
|
"eval_steps_per_second": 1.657, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.0370614528656006, |
|
"learning_rate": 9.849741555475418e-05, |
|
"loss": 0.3721, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.619168996810913, |
|
"learning_rate": 9.819689866570502e-05, |
|
"loss": 0.3498, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.6790041923522949, |
|
"learning_rate": 9.789638177665586e-05, |
|
"loss": 0.3633, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.47009700536727905, |
|
"learning_rate": 9.759586488760669e-05, |
|
"loss": 0.2891, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.3022407293319702, |
|
"eval_na_accuracy": 0.699312686920166, |
|
"eval_ordinal_accuracy": 0.5044952630996704, |
|
"eval_ordinal_mae": 0.7738743424415588, |
|
"eval_runtime": 195.8973, |
|
"eval_samples_per_second": 22.844, |
|
"eval_steps_per_second": 2.859, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.6545381546020508, |
|
"learning_rate": 9.73073686741195e-05, |
|
"loss": 0.3277, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.2690508961677551, |
|
"learning_rate": 9.700685178507033e-05, |
|
"loss": 0.3505, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.571977138519287, |
|
"learning_rate": 9.670633489602116e-05, |
|
"loss": 0.2819, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.7339119911193848, |
|
"learning_rate": 9.6405818006972e-05, |
|
"loss": 0.3163, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 0.26153865456581116, |
|
"eval_na_accuracy": 0.7903780341148376, |
|
"eval_ordinal_accuracy": 0.5597226023674011, |
|
"eval_ordinal_mae": 0.6166509985923767, |
|
"eval_runtime": 200.5643, |
|
"eval_samples_per_second": 22.312, |
|
"eval_steps_per_second": 2.792, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.86951744556427, |
|
"learning_rate": 9.610530111792283e-05, |
|
"loss": 0.2707, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3769865036010742, |
|
"learning_rate": 9.580478422887367e-05, |
|
"loss": 0.2917, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.39000704884529114, |
|
"learning_rate": 9.55042673398245e-05, |
|
"loss": 0.2765, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.7430490255355835, |
|
"learning_rate": 9.520375045077534e-05, |
|
"loss": 0.2781, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.2597866952419281, |
|
"eval_na_accuracy": 0.8298969268798828, |
|
"eval_ordinal_accuracy": 0.5936295986175537, |
|
"eval_ordinal_mae": 0.5432500839233398, |
|
"eval_runtime": 199.7937, |
|
"eval_samples_per_second": 22.398, |
|
"eval_steps_per_second": 2.803, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.8847980499267578, |
|
"learning_rate": 9.490323356172616e-05, |
|
"loss": 0.2643, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.0712331533432007, |
|
"learning_rate": 9.461473734823898e-05, |
|
"loss": 0.2747, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.9433643221855164, |
|
"learning_rate": 9.431422045918981e-05, |
|
"loss": 0.295, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2171131372451782, |
|
"learning_rate": 9.401370357014065e-05, |
|
"loss": 0.2731, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 0.26127612590789795, |
|
"eval_na_accuracy": 0.8453608155250549, |
|
"eval_ordinal_accuracy": 0.5566401481628418, |
|
"eval_ordinal_mae": 0.5651282668113708, |
|
"eval_runtime": 199.6257, |
|
"eval_samples_per_second": 22.417, |
|
"eval_steps_per_second": 2.805, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.1272056102752686, |
|
"learning_rate": 9.371318668109149e-05, |
|
"loss": 0.2878, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.4902811348438263, |
|
"learning_rate": 9.341266979204232e-05, |
|
"loss": 0.2426, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.5631658434867859, |
|
"learning_rate": 9.311215290299315e-05, |
|
"loss": 0.2487, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.6327888369560242, |
|
"learning_rate": 9.281163601394399e-05, |
|
"loss": 0.2926, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.2734401226043701, |
|
"eval_na_accuracy": 0.9089347124099731, |
|
"eval_ordinal_accuracy": 0.5494477152824402, |
|
"eval_ordinal_mae": 0.5305272340774536, |
|
"eval_runtime": 196.452, |
|
"eval_samples_per_second": 22.779, |
|
"eval_steps_per_second": 2.851, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.6612924337387085, |
|
"learning_rate": 9.251111912489481e-05, |
|
"loss": 0.2871, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.513719081878662, |
|
"learning_rate": 9.221060223584565e-05, |
|
"loss": 0.2538, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.1069471836090088, |
|
"learning_rate": 9.191008534679649e-05, |
|
"loss": 0.21, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.458164691925049, |
|
"learning_rate": 9.160956845774733e-05, |
|
"loss": 0.2686, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.23620270192623138, |
|
"eval_na_accuracy": 0.7886598110198975, |
|
"eval_ordinal_accuracy": 0.6249678730964661, |
|
"eval_ordinal_mae": 0.485275000333786, |
|
"eval_runtime": 190.0791, |
|
"eval_samples_per_second": 23.543, |
|
"eval_steps_per_second": 2.946, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.5598509907722473, |
|
"learning_rate": 9.130905156869817e-05, |
|
"loss": 0.2525, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.2617449760437012, |
|
"learning_rate": 9.1008534679649e-05, |
|
"loss": 0.2453, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.5153591632843018, |
|
"learning_rate": 9.070801779059983e-05, |
|
"loss": 0.225, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.562745451927185, |
|
"learning_rate": 9.040750090155067e-05, |
|
"loss": 0.2715, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.24541285634040833, |
|
"eval_na_accuracy": 0.7714776396751404, |
|
"eval_ordinal_accuracy": 0.6254816055297852, |
|
"eval_ordinal_mae": 0.49135151505470276, |
|
"eval_runtime": 193.3675, |
|
"eval_samples_per_second": 23.142, |
|
"eval_steps_per_second": 2.896, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.5192123651504517, |
|
"learning_rate": 9.010698401250151e-05, |
|
"loss": 0.2424, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.8706183433532715, |
|
"learning_rate": 8.980646712345235e-05, |
|
"loss": 0.2341, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.0854986906051636, |
|
"learning_rate": 8.950595023440318e-05, |
|
"loss": 0.2694, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.208933115005493, |
|
"learning_rate": 8.920543334535402e-05, |
|
"loss": 0.2459, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.2451752871274948, |
|
"eval_na_accuracy": 0.7800687551498413, |
|
"eval_ordinal_accuracy": 0.6072437763214111, |
|
"eval_ordinal_mae": 0.47625866532325745, |
|
"eval_runtime": 192.5483, |
|
"eval_samples_per_second": 23.241, |
|
"eval_steps_per_second": 2.908, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.8433920741081238, |
|
"learning_rate": 8.890491645630485e-05, |
|
"loss": 0.2375, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.9415366649627686, |
|
"learning_rate": 8.860439956725569e-05, |
|
"loss": 0.2918, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.6653732061386108, |
|
"learning_rate": 8.830388267820651e-05, |
|
"loss": 0.2615, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.47592782974243164, |
|
"learning_rate": 8.800336578915735e-05, |
|
"loss": 0.2033, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.23654678463935852, |
|
"eval_na_accuracy": 0.7663230299949646, |
|
"eval_ordinal_accuracy": 0.6105831265449524, |
|
"eval_ordinal_mae": 0.4967080354690552, |
|
"eval_runtime": 192.911, |
|
"eval_samples_per_second": 23.197, |
|
"eval_steps_per_second": 2.903, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.8534409403800964, |
|
"learning_rate": 8.770284890010819e-05, |
|
"loss": 0.2082, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.6388253569602966, |
|
"learning_rate": 8.740233201105903e-05, |
|
"loss": 0.2238, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.5685051679611206, |
|
"learning_rate": 8.710181512200987e-05, |
|
"loss": 0.2775, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.9940025806427002, |
|
"learning_rate": 8.680129823296069e-05, |
|
"loss": 0.2234, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.22986453771591187, |
|
"eval_na_accuracy": 0.8676975965499878, |
|
"eval_ordinal_accuracy": 0.6180323362350464, |
|
"eval_ordinal_mae": 0.49474066495895386, |
|
"eval_runtime": 191.9691, |
|
"eval_samples_per_second": 23.311, |
|
"eval_steps_per_second": 2.917, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.6706956624984741, |
|
"learning_rate": 8.650078134391153e-05, |
|
"loss": 0.2027, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.4717120826244354, |
|
"learning_rate": 8.620026445486237e-05, |
|
"loss": 0.2072, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.5387229919433594, |
|
"learning_rate": 8.58997475658132e-05, |
|
"loss": 0.2328, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.0794016122817993, |
|
"learning_rate": 8.559923067676405e-05, |
|
"loss": 0.2035, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.2313634753227234, |
|
"eval_na_accuracy": 0.7835051417350769, |
|
"eval_ordinal_accuracy": 0.6308759450912476, |
|
"eval_ordinal_mae": 0.4743907153606415, |
|
"eval_runtime": 191.4648, |
|
"eval_samples_per_second": 23.372, |
|
"eval_steps_per_second": 2.925, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.2199459075927734, |
|
"learning_rate": 8.529871378771488e-05, |
|
"loss": 0.2769, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.5869888067245483, |
|
"learning_rate": 8.499819689866571e-05, |
|
"loss": 0.2194, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1209378242492676, |
|
"learning_rate": 8.469768000961655e-05, |
|
"loss": 0.2536, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.4376941919326782, |
|
"learning_rate": 8.439716312056739e-05, |
|
"loss": 0.2277, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.23891955614089966, |
|
"eval_na_accuracy": 0.730240523815155, |
|
"eval_ordinal_accuracy": 0.643462598323822, |
|
"eval_ordinal_mae": 0.46490678191185, |
|
"eval_runtime": 193.3761, |
|
"eval_samples_per_second": 23.141, |
|
"eval_steps_per_second": 2.896, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8051934242248535, |
|
"learning_rate": 8.409664623151821e-05, |
|
"loss": 0.2007, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.5033048391342163, |
|
"learning_rate": 8.379612934246905e-05, |
|
"loss": 0.1993, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.577111542224884, |
|
"learning_rate": 8.349561245341989e-05, |
|
"loss": 0.2337, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.8847935795783997, |
|
"learning_rate": 8.319509556437071e-05, |
|
"loss": 0.2535, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.22593823075294495, |
|
"eval_na_accuracy": 0.8247422575950623, |
|
"eval_ordinal_accuracy": 0.6021063327789307, |
|
"eval_ordinal_mae": 0.45086583495140076, |
|
"eval_runtime": 192.5485, |
|
"eval_samples_per_second": 23.241, |
|
"eval_steps_per_second": 2.908, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.888088345527649, |
|
"learning_rate": 8.289457867532155e-05, |
|
"loss": 0.2162, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.9532464146614075, |
|
"learning_rate": 8.259406178627239e-05, |
|
"loss": 0.2632, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.7865819931030273, |
|
"learning_rate": 8.229354489722323e-05, |
|
"loss": 0.2005, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0594526529312134, |
|
"learning_rate": 8.199302800817407e-05, |
|
"loss": 0.2209, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 0.23685050010681152, |
|
"eval_na_accuracy": 0.7577319741249084, |
|
"eval_ordinal_accuracy": 0.6362702250480652, |
|
"eval_ordinal_mae": 0.45073509216308594, |
|
"eval_runtime": 192.1913, |
|
"eval_samples_per_second": 23.284, |
|
"eval_steps_per_second": 2.914, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.9557788372039795, |
|
"learning_rate": 8.16925111191249e-05, |
|
"loss": 0.2009, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.7707318067550659, |
|
"learning_rate": 8.139199423007573e-05, |
|
"loss": 0.2172, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.569989562034607, |
|
"learning_rate": 8.109147734102657e-05, |
|
"loss": 0.2229, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.5089187622070312, |
|
"learning_rate": 8.079096045197741e-05, |
|
"loss": 0.2007, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.21611952781677246, |
|
"eval_na_accuracy": 0.831615149974823, |
|
"eval_ordinal_accuracy": 0.6539943218231201, |
|
"eval_ordinal_mae": 0.427180677652359, |
|
"eval_runtime": 190.3474, |
|
"eval_samples_per_second": 23.51, |
|
"eval_steps_per_second": 2.942, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.9716092348098755, |
|
"learning_rate": 8.049044356292825e-05, |
|
"loss": 0.2375, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.04927659034729, |
|
"learning_rate": 8.018992667387908e-05, |
|
"loss": 0.2548, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.1613515615463257, |
|
"learning_rate": 7.988940978482991e-05, |
|
"loss": 0.3238, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.4977881908416748, |
|
"learning_rate": 7.958889289578075e-05, |
|
"loss": 0.2013, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 0.24333100020885468, |
|
"eval_na_accuracy": 0.7731958627700806, |
|
"eval_ordinal_accuracy": 0.6128949522972107, |
|
"eval_ordinal_mae": 0.43257907032966614, |
|
"eval_runtime": 193.154, |
|
"eval_samples_per_second": 23.168, |
|
"eval_steps_per_second": 2.899, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.6059219837188721, |
|
"learning_rate": 7.928837600673157e-05, |
|
"loss": 0.273, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.3245080709457397, |
|
"learning_rate": 7.898785911768241e-05, |
|
"loss": 0.2148, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.5151876211166382, |
|
"learning_rate": 7.868734222863325e-05, |
|
"loss": 0.1901, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.37926343083381653, |
|
"learning_rate": 7.838682533958409e-05, |
|
"loss": 0.1999, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.22266778349876404, |
|
"eval_na_accuracy": 0.8247422575950623, |
|
"eval_ordinal_accuracy": 0.6552786827087402, |
|
"eval_ordinal_mae": 0.4460422396659851, |
|
"eval_runtime": 193.6404, |
|
"eval_samples_per_second": 23.11, |
|
"eval_steps_per_second": 2.892, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 4.746518611907959, |
|
"learning_rate": 7.808630845053493e-05, |
|
"loss": 0.2169, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.3521455228328705, |
|
"learning_rate": 7.778579156148575e-05, |
|
"loss": 0.228, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.3322985172271729, |
|
"learning_rate": 7.748527467243659e-05, |
|
"loss": 0.2047, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.6390167474746704, |
|
"learning_rate": 7.718475778338743e-05, |
|
"loss": 0.2157, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.2134324014186859, |
|
"eval_na_accuracy": 0.8161512017250061, |
|
"eval_ordinal_accuracy": 0.6362702250480652, |
|
"eval_ordinal_mae": 0.4728001058101654, |
|
"eval_runtime": 191.9038, |
|
"eval_samples_per_second": 23.319, |
|
"eval_steps_per_second": 2.918, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.581801176071167, |
|
"learning_rate": 7.688424089433827e-05, |
|
"loss": 0.2335, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.3418828248977661, |
|
"learning_rate": 7.65837240052891e-05, |
|
"loss": 0.2446, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.605707585811615, |
|
"learning_rate": 7.628320711623994e-05, |
|
"loss": 0.2144, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.6443113088607788, |
|
"learning_rate": 7.598269022719077e-05, |
|
"loss": 0.2154, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.2238776683807373, |
|
"eval_na_accuracy": 0.8573883175849915, |
|
"eval_ordinal_accuracy": 0.5787310600280762, |
|
"eval_ordinal_mae": 0.4734483063220978, |
|
"eval_runtime": 192.57, |
|
"eval_samples_per_second": 23.238, |
|
"eval_steps_per_second": 2.908, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.4812798500061035, |
|
"learning_rate": 7.568217333814161e-05, |
|
"loss": 0.2172, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.5400125980377197, |
|
"learning_rate": 7.538165644909245e-05, |
|
"loss": 0.1907, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.6313005685806274, |
|
"learning_rate": 7.508113956004327e-05, |
|
"loss": 0.2078, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.3446621894836426, |
|
"learning_rate": 7.478062267099411e-05, |
|
"loss": 0.2169, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.23939213156700134, |
|
"eval_na_accuracy": 0.8848797082901001, |
|
"eval_ordinal_accuracy": 0.6254816055297852, |
|
"eval_ordinal_mae": 0.4392252564430237, |
|
"eval_runtime": 194.2506, |
|
"eval_samples_per_second": 23.037, |
|
"eval_steps_per_second": 2.883, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.5709229111671448, |
|
"learning_rate": 7.448010578194495e-05, |
|
"loss": 0.2155, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.9002838134765625, |
|
"learning_rate": 7.417958889289577e-05, |
|
"loss": 0.2534, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.3746689558029175, |
|
"learning_rate": 7.387907200384661e-05, |
|
"loss": 0.2489, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.1106956005096436, |
|
"learning_rate": 7.357855511479745e-05, |
|
"loss": 0.2719, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.2283402532339096, |
|
"eval_na_accuracy": 0.8780068755149841, |
|
"eval_ordinal_accuracy": 0.6229129433631897, |
|
"eval_ordinal_mae": 0.4323791265487671, |
|
"eval_runtime": 193.3183, |
|
"eval_samples_per_second": 23.148, |
|
"eval_steps_per_second": 2.897, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.5689876079559326, |
|
"learning_rate": 7.327803822574829e-05, |
|
"loss": 0.2054, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.14363694190979, |
|
"learning_rate": 7.297752133669913e-05, |
|
"loss": 0.213, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.4198272228240967, |
|
"learning_rate": 7.267700444764997e-05, |
|
"loss": 0.1729, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.5297597646713257, |
|
"learning_rate": 7.237648755860079e-05, |
|
"loss": 0.2244, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.2139568328857422, |
|
"eval_na_accuracy": 0.8728522062301636, |
|
"eval_ordinal_accuracy": 0.6313896775245667, |
|
"eval_ordinal_mae": 0.4482755959033966, |
|
"eval_runtime": 193.8188, |
|
"eval_samples_per_second": 23.089, |
|
"eval_steps_per_second": 2.889, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.4800972044467926, |
|
"learning_rate": 7.207597066955163e-05, |
|
"loss": 0.231, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.125603199005127, |
|
"learning_rate": 7.177545378050247e-05, |
|
"loss": 0.2371, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.553396224975586, |
|
"learning_rate": 7.147493689145331e-05, |
|
"loss": 0.217, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.5980515480041504, |
|
"learning_rate": 7.117442000240415e-05, |
|
"loss": 0.2072, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.2198052555322647, |
|
"eval_na_accuracy": 0.8213058710098267, |
|
"eval_ordinal_accuracy": 0.6439763903617859, |
|
"eval_ordinal_mae": 0.4330386817455292, |
|
"eval_runtime": 193.8423, |
|
"eval_samples_per_second": 23.086, |
|
"eval_steps_per_second": 2.889, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0013697147369385, |
|
"learning_rate": 7.087390311335498e-05, |
|
"loss": 0.2141, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.5208072662353516, |
|
"learning_rate": 7.057338622430581e-05, |
|
"loss": 0.2109, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.3612011969089508, |
|
"learning_rate": 7.027286933525665e-05, |
|
"loss": 0.1701, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6919033527374268, |
|
"learning_rate": 6.997235244620747e-05, |
|
"loss": 0.1754, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.20990537106990814, |
|
"eval_na_accuracy": 0.8419243693351746, |
|
"eval_ordinal_accuracy": 0.6712047457695007, |
|
"eval_ordinal_mae": 0.41983720660209656, |
|
"eval_runtime": 193.4285, |
|
"eval_samples_per_second": 23.135, |
|
"eval_steps_per_second": 2.895, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.494234323501587, |
|
"learning_rate": 6.967183555715831e-05, |
|
"loss": 0.2547, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.2194478511810303, |
|
"learning_rate": 6.937131866810915e-05, |
|
"loss": 0.2105, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.8242117166519165, |
|
"learning_rate": 6.907080177905999e-05, |
|
"loss": 0.185, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.185267210006714, |
|
"learning_rate": 6.877028489001081e-05, |
|
"loss": 0.1773, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 0.20526131987571716, |
|
"eval_na_accuracy": 0.8642611503601074, |
|
"eval_ordinal_accuracy": 0.6586180329322815, |
|
"eval_ordinal_mae": 0.4104856550693512, |
|
"eval_runtime": 191.4771, |
|
"eval_samples_per_second": 23.371, |
|
"eval_steps_per_second": 2.925, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.5727041959762573, |
|
"learning_rate": 6.846976800096165e-05, |
|
"loss": 0.1994, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.3567266464233398, |
|
"learning_rate": 6.816925111191249e-05, |
|
"loss": 0.2087, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.43862801790237427, |
|
"learning_rate": 6.786873422286333e-05, |
|
"loss": 0.2394, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.7140023708343506, |
|
"learning_rate": 6.756821733381417e-05, |
|
"loss": 0.2378, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.20436730980873108, |
|
"eval_na_accuracy": 0.8333333134651184, |
|
"eval_ordinal_accuracy": 0.6647829413414001, |
|
"eval_ordinal_mae": 0.4324062764644623, |
|
"eval_runtime": 193.6819, |
|
"eval_samples_per_second": 23.105, |
|
"eval_steps_per_second": 2.891, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.44048529863357544, |
|
"learning_rate": 6.7267700444765e-05, |
|
"loss": 0.172, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.4433631896972656, |
|
"learning_rate": 6.696718355571583e-05, |
|
"loss": 0.1666, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6148251891136169, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.2, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.5114859342575073, |
|
"learning_rate": 6.636614977761751e-05, |
|
"loss": 0.1295, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.20437294244766235, |
|
"eval_na_accuracy": 0.8247422575950623, |
|
"eval_ordinal_accuracy": 0.6843051910400391, |
|
"eval_ordinal_mae": 0.4015503525733948, |
|
"eval_runtime": 191.7797, |
|
"eval_samples_per_second": 23.334, |
|
"eval_steps_per_second": 2.92, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.5778698921203613, |
|
"learning_rate": 6.606563288856835e-05, |
|
"loss": 0.1506, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.3983612358570099, |
|
"learning_rate": 6.576511599951917e-05, |
|
"loss": 0.1358, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.8497461080551147, |
|
"learning_rate": 6.546459911047001e-05, |
|
"loss": 0.1288, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.2482887506484985, |
|
"learning_rate": 6.516408222142085e-05, |
|
"loss": 0.1126, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.2301819771528244, |
|
"eval_na_accuracy": 0.7577319741249084, |
|
"eval_ordinal_accuracy": 0.6804521083831787, |
|
"eval_ordinal_mae": 0.402468740940094, |
|
"eval_runtime": 195.9356, |
|
"eval_samples_per_second": 22.839, |
|
"eval_steps_per_second": 2.858, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 12.926443099975586, |
|
"learning_rate": 6.486356533237167e-05, |
|
"loss": 0.1244, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.5321182012557983, |
|
"learning_rate": 6.456304844332251e-05, |
|
"loss": 0.1368, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.4846901297569275, |
|
"learning_rate": 6.426253155427335e-05, |
|
"loss": 0.1837, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.6585441827774048, |
|
"learning_rate": 6.396201466522419e-05, |
|
"loss": 0.1262, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.22050338983535767, |
|
"eval_na_accuracy": 0.8092783689498901, |
|
"eval_ordinal_accuracy": 0.6516824960708618, |
|
"eval_ordinal_mae": 0.4016842246055603, |
|
"eval_runtime": 191.162, |
|
"eval_samples_per_second": 23.409, |
|
"eval_steps_per_second": 2.929, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.30136457085609436, |
|
"learning_rate": 6.366149777617503e-05, |
|
"loss": 0.1417, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.017369508743286, |
|
"learning_rate": 6.336098088712587e-05, |
|
"loss": 0.1155, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.6489114165306091, |
|
"learning_rate": 6.306046399807669e-05, |
|
"loss": 0.1074, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.5059970617294312, |
|
"learning_rate": 6.275994710902753e-05, |
|
"loss": 0.1104, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.21169866621494293, |
|
"eval_na_accuracy": 0.8453608155250549, |
|
"eval_ordinal_accuracy": 0.6778833866119385, |
|
"eval_ordinal_mae": 0.39305010437965393, |
|
"eval_runtime": 189.5391, |
|
"eval_samples_per_second": 23.61, |
|
"eval_steps_per_second": 2.955, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.127930998802185, |
|
"learning_rate": 6.245943021997837e-05, |
|
"loss": 0.1353, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.9614742994308472, |
|
"learning_rate": 6.215891333092921e-05, |
|
"loss": 0.1069, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.5779272317886353, |
|
"learning_rate": 6.185839644188005e-05, |
|
"loss": 0.1161, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.6223546266555786, |
|
"learning_rate": 6.155787955283087e-05, |
|
"loss": 0.1657, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.21738822758197784, |
|
"eval_na_accuracy": 0.8591065406799316, |
|
"eval_ordinal_accuracy": 0.6665810346603394, |
|
"eval_ordinal_mae": 0.38904985785484314, |
|
"eval_runtime": 193.8286, |
|
"eval_samples_per_second": 23.087, |
|
"eval_steps_per_second": 2.889, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.1987367868423462, |
|
"learning_rate": 6.125736266378171e-05, |
|
"loss": 0.1139, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.510863184928894, |
|
"learning_rate": 6.095684577473254e-05, |
|
"loss": 0.1286, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.7094324827194214, |
|
"learning_rate": 6.065632888568338e-05, |
|
"loss": 0.1231, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 2.9405245780944824, |
|
"learning_rate": 6.035581199663422e-05, |
|
"loss": 0.1186, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.22988717257976532, |
|
"eval_na_accuracy": 0.8058419227600098, |
|
"eval_ordinal_accuracy": 0.6622142195701599, |
|
"eval_ordinal_mae": 0.40129175782203674, |
|
"eval_runtime": 191.4373, |
|
"eval_samples_per_second": 23.376, |
|
"eval_steps_per_second": 2.925, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.9970686435699463, |
|
"learning_rate": 6.005529510758505e-05, |
|
"loss": 0.1492, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.4853156805038452, |
|
"learning_rate": 5.975477821853589e-05, |
|
"loss": 0.0923, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 3.004967451095581, |
|
"learning_rate": 5.9454261329486714e-05, |
|
"loss": 0.1395, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 2.5411417484283447, |
|
"learning_rate": 5.915374444043755e-05, |
|
"loss": 0.1304, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.21757462620735168, |
|
"eval_na_accuracy": 0.8109965920448303, |
|
"eval_ordinal_accuracy": 0.6902132034301758, |
|
"eval_ordinal_mae": 0.3801174759864807, |
|
"eval_runtime": 191.9938, |
|
"eval_samples_per_second": 23.308, |
|
"eval_steps_per_second": 2.917, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.031360626220703, |
|
"learning_rate": 5.885322755138839e-05, |
|
"loss": 0.161, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.9166708588600159, |
|
"learning_rate": 5.855271066233923e-05, |
|
"loss": 0.1062, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 2.4028913974761963, |
|
"learning_rate": 5.825219377329007e-05, |
|
"loss": 0.1085, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.99100261926651, |
|
"learning_rate": 5.79516768842409e-05, |
|
"loss": 0.1081, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.23295216262340546, |
|
"eval_na_accuracy": 0.831615149974823, |
|
"eval_ordinal_accuracy": 0.664269208908081, |
|
"eval_ordinal_mae": 0.3867188096046448, |
|
"eval_runtime": 194.3732, |
|
"eval_samples_per_second": 23.023, |
|
"eval_steps_per_second": 2.881, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.6745060682296753, |
|
"learning_rate": 5.765115999519173e-05, |
|
"loss": 0.1079, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.5718048810958862, |
|
"learning_rate": 5.735064310614256e-05, |
|
"loss": 0.1353, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.4868764579296112, |
|
"learning_rate": 5.70501262170934e-05, |
|
"loss": 0.1872, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.2596338987350464, |
|
"learning_rate": 5.674960932804424e-05, |
|
"loss": 0.1281, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 0.23198525607585907, |
|
"eval_na_accuracy": 0.7680412530899048, |
|
"eval_ordinal_accuracy": 0.6902132034301758, |
|
"eval_ordinal_mae": 0.3953803777694702, |
|
"eval_runtime": 193.0669, |
|
"eval_samples_per_second": 23.178, |
|
"eval_steps_per_second": 2.901, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.5509974360466003, |
|
"learning_rate": 5.644909243899508e-05, |
|
"loss": 0.1085, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.8018646240234375, |
|
"learning_rate": 5.614857554994592e-05, |
|
"loss": 0.1134, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.46525055170059204, |
|
"learning_rate": 5.584805866089674e-05, |
|
"loss": 0.1789, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.4697776734828949, |
|
"learning_rate": 5.554754177184758e-05, |
|
"loss": 0.1192, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.231234610080719, |
|
"eval_na_accuracy": 0.7989690899848938, |
|
"eval_ordinal_accuracy": 0.6768559217453003, |
|
"eval_ordinal_mae": 0.4108929932117462, |
|
"eval_runtime": 192.9099, |
|
"eval_samples_per_second": 23.197, |
|
"eval_steps_per_second": 2.903, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.9536067247390747, |
|
"learning_rate": 5.524702488279841e-05, |
|
"loss": 0.1531, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.6636119484901428, |
|
"learning_rate": 5.494650799374925e-05, |
|
"loss": 0.1152, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 3.2403175830841064, |
|
"learning_rate": 5.464599110470009e-05, |
|
"loss": 0.133, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.7575627565383911, |
|
"learning_rate": 5.434547421565093e-05, |
|
"loss": 0.1029, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.21953611075878143, |
|
"eval_na_accuracy": 0.8024054765701294, |
|
"eval_ordinal_accuracy": 0.681993305683136, |
|
"eval_ordinal_mae": 0.38697296380996704, |
|
"eval_runtime": 192.4936, |
|
"eval_samples_per_second": 23.248, |
|
"eval_steps_per_second": 2.909, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.6479007601737976, |
|
"learning_rate": 5.404495732660175e-05, |
|
"loss": 0.1174, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 2.8630564212799072, |
|
"learning_rate": 5.374444043755259e-05, |
|
"loss": 0.1398, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.6482840180397034, |
|
"learning_rate": 5.344392354850343e-05, |
|
"loss": 0.1144, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.39722609519958496, |
|
"learning_rate": 5.314340665945426e-05, |
|
"loss": 0.1159, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.22003380954265594, |
|
"eval_na_accuracy": 0.7903780341148376, |
|
"eval_ordinal_accuracy": 0.6812227368354797, |
|
"eval_ordinal_mae": 0.3859783113002777, |
|
"eval_runtime": 193.9688, |
|
"eval_samples_per_second": 23.071, |
|
"eval_steps_per_second": 2.887, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.7128223776817322, |
|
"learning_rate": 5.28428897704051e-05, |
|
"loss": 0.1437, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 2.3041298389434814, |
|
"learning_rate": 5.254237288135594e-05, |
|
"loss": 0.1413, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.9336991310119629, |
|
"learning_rate": 5.2241855992306764e-05, |
|
"loss": 0.1112, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 3.5301501750946045, |
|
"learning_rate": 5.19413391032576e-05, |
|
"loss": 0.1159, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.21585588157176971, |
|
"eval_na_accuracy": 0.7989690899848938, |
|
"eval_ordinal_accuracy": 0.6981762051582336, |
|
"eval_ordinal_mae": 0.3712264895439148, |
|
"eval_runtime": 191.7494, |
|
"eval_samples_per_second": 23.338, |
|
"eval_steps_per_second": 2.92, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.8215810060501099, |
|
"learning_rate": 5.164082221420844e-05, |
|
"loss": 0.0948, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.8609411716461182, |
|
"learning_rate": 5.134030532515928e-05, |
|
"loss": 0.0936, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.7595614790916443, |
|
"learning_rate": 5.103978843611011e-05, |
|
"loss": 0.1181, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.7951629757881165, |
|
"learning_rate": 5.073927154706095e-05, |
|
"loss": 0.107, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.22618594765663147, |
|
"eval_na_accuracy": 0.8213058710098267, |
|
"eval_ordinal_accuracy": 0.6904700994491577, |
|
"eval_ordinal_mae": 0.37572237849235535, |
|
"eval_runtime": 193.8218, |
|
"eval_samples_per_second": 23.088, |
|
"eval_steps_per_second": 2.889, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.809657096862793, |
|
"learning_rate": 5.0438754658011775e-05, |
|
"loss": 0.1275, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.6124809384346008, |
|
"learning_rate": 5.0138237768962613e-05, |
|
"loss": 0.0975, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.5735268592834473, |
|
"learning_rate": 4.983772087991345e-05, |
|
"loss": 0.1267, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.2053624987602234, |
|
"learning_rate": 4.953720399086429e-05, |
|
"loss": 0.1262, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.22913989424705505, |
|
"eval_na_accuracy": 0.8247422575950623, |
|
"eval_ordinal_accuracy": 0.683534562587738, |
|
"eval_ordinal_mae": 0.3841015100479126, |
|
"eval_runtime": 192.0713, |
|
"eval_samples_per_second": 23.299, |
|
"eval_steps_per_second": 2.916, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.163071632385254, |
|
"learning_rate": 4.923668710181513e-05, |
|
"loss": 0.112, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.48347094655036926, |
|
"learning_rate": 4.893617021276596e-05, |
|
"loss": 0.1312, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 2.0154635906219482, |
|
"learning_rate": 4.863565332371679e-05, |
|
"loss": 0.1057, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.7999942302703857, |
|
"learning_rate": 4.833513643466763e-05, |
|
"loss": 0.1437, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.23114901781082153, |
|
"eval_na_accuracy": 0.800687313079834, |
|
"eval_ordinal_accuracy": 0.6922681927680969, |
|
"eval_ordinal_mae": 0.3750612139701843, |
|
"eval_runtime": 193.0261, |
|
"eval_samples_per_second": 23.183, |
|
"eval_steps_per_second": 2.901, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.7880312204360962, |
|
"learning_rate": 4.803461954561846e-05, |
|
"loss": 0.1099, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.5259920954704285, |
|
"learning_rate": 4.77341026565693e-05, |
|
"loss": 0.1355, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.49388599395751953, |
|
"learning_rate": 4.743358576752014e-05, |
|
"loss": 0.1331, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.7215922474861145, |
|
"learning_rate": 4.714508955403294e-05, |
|
"loss": 0.0916, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.23432399332523346, |
|
"eval_na_accuracy": 0.8659793734550476, |
|
"eval_ordinal_accuracy": 0.6791677474975586, |
|
"eval_ordinal_mae": 0.37433725595474243, |
|
"eval_runtime": 191.7082, |
|
"eval_samples_per_second": 23.343, |
|
"eval_steps_per_second": 2.921, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.4619504511356354, |
|
"learning_rate": 4.684457266498378e-05, |
|
"loss": 0.11, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.470409393310547, |
|
"learning_rate": 4.654405577593461e-05, |
|
"loss": 0.138, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.46536511182785034, |
|
"learning_rate": 4.6243538886885443e-05, |
|
"loss": 0.1233, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.4539566934108734, |
|
"learning_rate": 4.594302199783628e-05, |
|
"loss": 0.1266, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.22511447966098785, |
|
"eval_na_accuracy": 0.8505154848098755, |
|
"eval_ordinal_accuracy": 0.6861032843589783, |
|
"eval_ordinal_mae": 0.37239742279052734, |
|
"eval_runtime": 185.8881, |
|
"eval_samples_per_second": 24.074, |
|
"eval_steps_per_second": 3.013, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.497732013463974, |
|
"learning_rate": 4.5642505108787114e-05, |
|
"loss": 0.109, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 2.833667278289795, |
|
"learning_rate": 4.534198821973795e-05, |
|
"loss": 0.1342, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.6105310320854187, |
|
"learning_rate": 4.504147133068879e-05, |
|
"loss": 0.125, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.5035513639450073, |
|
"learning_rate": 4.474095444163962e-05, |
|
"loss": 0.1185, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.22424614429473877, |
|
"eval_na_accuracy": 0.8264604806900024, |
|
"eval_ordinal_accuracy": 0.6902132034301758, |
|
"eval_ordinal_mae": 0.36660563945770264, |
|
"eval_runtime": 188.1827, |
|
"eval_samples_per_second": 23.78, |
|
"eval_steps_per_second": 2.976, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.6165774464607239, |
|
"learning_rate": 4.444043755259046e-05, |
|
"loss": 0.1273, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 2.2787024974823, |
|
"learning_rate": 4.413992066354129e-05, |
|
"loss": 0.1537, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.2412303686141968, |
|
"learning_rate": 4.3839403774492125e-05, |
|
"loss": 0.1197, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 2.671398162841797, |
|
"learning_rate": 4.353888688544296e-05, |
|
"loss": 0.1037, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.22189220786094666, |
|
"eval_na_accuracy": 0.8522336483001709, |
|
"eval_ordinal_accuracy": 0.6845620274543762, |
|
"eval_ordinal_mae": 0.3699798583984375, |
|
"eval_runtime": 185.1462, |
|
"eval_samples_per_second": 24.17, |
|
"eval_steps_per_second": 3.025, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.7951823472976685, |
|
"learning_rate": 4.32383699963938e-05, |
|
"loss": 0.1335, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.5750038027763367, |
|
"learning_rate": 4.2937853107344634e-05, |
|
"loss": 0.1225, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.599660336971283, |
|
"learning_rate": 4.263733621829547e-05, |
|
"loss": 0.1153, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.5971084237098694, |
|
"learning_rate": 4.233681932924631e-05, |
|
"loss": 0.1264, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.22111621499061584, |
|
"eval_na_accuracy": 0.8350515365600586, |
|
"eval_ordinal_accuracy": 0.6891857385635376, |
|
"eval_ordinal_mae": 0.36766940355300903, |
|
"eval_runtime": 187.791, |
|
"eval_samples_per_second": 23.83, |
|
"eval_steps_per_second": 2.982, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.6335272789001465, |
|
"learning_rate": 4.203630244019714e-05, |
|
"loss": 0.1309, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.4906469583511353, |
|
"learning_rate": 4.1735785551147974e-05, |
|
"loss": 0.1481, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.5834776163101196, |
|
"learning_rate": 4.143526866209881e-05, |
|
"loss": 0.1082, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.3094954788684845, |
|
"learning_rate": 4.1134751773049644e-05, |
|
"loss": 0.1404, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.22064529359340668, |
|
"eval_na_accuracy": 0.7938144207000732, |
|
"eval_ordinal_accuracy": 0.6945800185203552, |
|
"eval_ordinal_mae": 0.371782511472702, |
|
"eval_runtime": 187.3419, |
|
"eval_samples_per_second": 23.887, |
|
"eval_steps_per_second": 2.989, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.43263766169548035, |
|
"learning_rate": 4.083423488400048e-05, |
|
"loss": 0.0901, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.0660347938537598, |
|
"learning_rate": 4.053371799495132e-05, |
|
"loss": 0.1197, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.7330833077430725, |
|
"learning_rate": 4.023320110590215e-05, |
|
"loss": 0.1052, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 2.152076005935669, |
|
"learning_rate": 3.993268421685299e-05, |
|
"loss": 0.1238, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.20976723730564117, |
|
"eval_na_accuracy": 0.8264604806900024, |
|
"eval_ordinal_accuracy": 0.6948369145393372, |
|
"eval_ordinal_mae": 0.37225744128227234, |
|
"eval_runtime": 186.1293, |
|
"eval_samples_per_second": 24.042, |
|
"eval_steps_per_second": 3.009, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.28938254714012146, |
|
"learning_rate": 3.9632167327803824e-05, |
|
"loss": 0.1089, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.5542977452278137, |
|
"learning_rate": 3.9331650438754655e-05, |
|
"loss": 0.1034, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 3.0670604705810547, |
|
"learning_rate": 3.9031133549705494e-05, |
|
"loss": 0.1207, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.1476399898529053, |
|
"learning_rate": 3.873061666065633e-05, |
|
"loss": 0.0868, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 0.2088630199432373, |
|
"eval_na_accuracy": 0.8144329786300659, |
|
"eval_ordinal_accuracy": 0.7025430202484131, |
|
"eval_ordinal_mae": 0.3573513329029083, |
|
"eval_runtime": 186.6283, |
|
"eval_samples_per_second": 23.978, |
|
"eval_steps_per_second": 3.001, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 1.155088186264038, |
|
"learning_rate": 3.8430099771607164e-05, |
|
"loss": 0.1285, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 5.079723358154297, |
|
"learning_rate": 3.8129582882558e-05, |
|
"loss": 0.1503, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.3054969310760498, |
|
"learning_rate": 3.782906599350884e-05, |
|
"loss": 0.1117, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.676068902015686, |
|
"learning_rate": 3.752854910445967e-05, |
|
"loss": 0.0828, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 0.2203822135925293, |
|
"eval_na_accuracy": 0.7817869186401367, |
|
"eval_ordinal_accuracy": 0.7030567526817322, |
|
"eval_ordinal_mae": 0.3679908215999603, |
|
"eval_runtime": 186.9501, |
|
"eval_samples_per_second": 23.937, |
|
"eval_steps_per_second": 2.995, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.5627509355545044, |
|
"learning_rate": 3.7228032215410505e-05, |
|
"loss": 0.1013, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.74530029296875, |
|
"learning_rate": 3.6927515326361343e-05, |
|
"loss": 0.1163, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.4432946741580963, |
|
"learning_rate": 3.6626998437312175e-05, |
|
"loss": 0.1076, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.8598417639732361, |
|
"learning_rate": 3.6326481548263014e-05, |
|
"loss": 0.0986, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.21255970001220703, |
|
"eval_na_accuracy": 0.8127147555351257, |
|
"eval_ordinal_accuracy": 0.6981762051582336, |
|
"eval_ordinal_mae": 0.35426145792007446, |
|
"eval_runtime": 188.2634, |
|
"eval_samples_per_second": 23.77, |
|
"eval_steps_per_second": 2.975, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.5436437726020813, |
|
"learning_rate": 3.602596465921385e-05, |
|
"loss": 0.1368, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.3918437063694, |
|
"learning_rate": 3.5725447770164684e-05, |
|
"loss": 0.1062, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 3.4165215492248535, |
|
"learning_rate": 3.542493088111552e-05, |
|
"loss": 0.1394, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.5507489442825317, |
|
"learning_rate": 3.5124413992066354e-05, |
|
"loss": 0.0869, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.2247212827205658, |
|
"eval_na_accuracy": 0.80756014585495, |
|
"eval_ordinal_accuracy": 0.7107629179954529, |
|
"eval_ordinal_mae": 0.35320159792900085, |
|
"eval_runtime": 186.4594, |
|
"eval_samples_per_second": 24.0, |
|
"eval_steps_per_second": 3.003, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.36401164531707764, |
|
"learning_rate": 3.4823897103017186e-05, |
|
"loss": 0.1216, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.5114885568618774, |
|
"learning_rate": 3.4523380213968025e-05, |
|
"loss": 0.1034, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.5193465352058411, |
|
"learning_rate": 3.422286332491886e-05, |
|
"loss": 0.095, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.3319372236728668, |
|
"learning_rate": 3.3922346435869695e-05, |
|
"loss": 0.1006, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 0.22681304812431335, |
|
"eval_na_accuracy": 0.8161512017250061, |
|
"eval_ordinal_accuracy": 0.702799916267395, |
|
"eval_ordinal_mae": 0.3637341260910034, |
|
"eval_runtime": 187.993, |
|
"eval_samples_per_second": 23.804, |
|
"eval_steps_per_second": 2.979, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.6313589215278625, |
|
"learning_rate": 3.3621829546820533e-05, |
|
"loss": 0.1265, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.3126126825809479, |
|
"learning_rate": 3.332131265777137e-05, |
|
"loss": 0.1002, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.7703510522842407, |
|
"learning_rate": 3.3020795768722204e-05, |
|
"loss": 0.065, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.5371888875961304, |
|
"learning_rate": 3.2720278879673036e-05, |
|
"loss": 0.0639, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 0.22521378099918365, |
|
"eval_na_accuracy": 0.8109965920448303, |
|
"eval_ordinal_accuracy": 0.7069098353385925, |
|
"eval_ordinal_mae": 0.3478536903858185, |
|
"eval_runtime": 186.1883, |
|
"eval_samples_per_second": 24.035, |
|
"eval_steps_per_second": 3.008, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.3779759109020233, |
|
"learning_rate": 3.2419761990623874e-05, |
|
"loss": 0.0491, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.3881787657737732, |
|
"learning_rate": 3.2119245101574706e-05, |
|
"loss": 0.0475, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.2288794368505478, |
|
"learning_rate": 3.1818728212525544e-05, |
|
"loss": 0.062, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.4537404477596283, |
|
"learning_rate": 3.151821132347638e-05, |
|
"loss": 0.0569, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 0.23154324293136597, |
|
"eval_na_accuracy": 0.80756014585495, |
|
"eval_ordinal_accuracy": 0.7166709303855896, |
|
"eval_ordinal_mae": 0.3399028480052948, |
|
"eval_runtime": 188.0319, |
|
"eval_samples_per_second": 23.799, |
|
"eval_steps_per_second": 2.978, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.4112221300601959, |
|
"learning_rate": 3.1217694434427215e-05, |
|
"loss": 0.0513, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.4015844166278839, |
|
"learning_rate": 3.091717754537805e-05, |
|
"loss": 0.0608, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.43563222885131836, |
|
"learning_rate": 3.0616660656328885e-05, |
|
"loss": 0.0463, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 2.328296422958374, |
|
"learning_rate": 3.031614376727972e-05, |
|
"loss": 0.0626, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 0.2304299920797348, |
|
"eval_na_accuracy": 0.8127147555351257, |
|
"eval_ordinal_accuracy": 0.702799916267395, |
|
"eval_ordinal_mae": 0.34806403517723083, |
|
"eval_runtime": 186.3734, |
|
"eval_samples_per_second": 24.011, |
|
"eval_steps_per_second": 3.005, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.5192309021949768, |
|
"learning_rate": 3.001562687823056e-05, |
|
"loss": 0.0506, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.7943512201309204, |
|
"learning_rate": 2.9715109989181394e-05, |
|
"loss": 0.0612, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.5543213486671448, |
|
"learning_rate": 2.9414593100132226e-05, |
|
"loss": 0.0571, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.32637158036231995, |
|
"learning_rate": 2.9114076211083064e-05, |
|
"loss": 0.0502, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.23814311623573303, |
|
"eval_na_accuracy": 0.8092783689498901, |
|
"eval_ordinal_accuracy": 0.6953506469726562, |
|
"eval_ordinal_mae": 0.3623509407043457, |
|
"eval_runtime": 185.1929, |
|
"eval_samples_per_second": 24.164, |
|
"eval_steps_per_second": 3.024, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.4265578091144562, |
|
"learning_rate": 2.88135593220339e-05, |
|
"loss": 0.0529, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.5854742527008057, |
|
"learning_rate": 2.8513042432984738e-05, |
|
"loss": 0.0458, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.6325811147689819, |
|
"learning_rate": 2.821252554393557e-05, |
|
"loss": 0.0422, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.3043919503688812, |
|
"learning_rate": 2.7912008654886408e-05, |
|
"loss": 0.0541, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 0.2298140674829483, |
|
"eval_na_accuracy": 0.8109965920448303, |
|
"eval_ordinal_accuracy": 0.7159003615379333, |
|
"eval_ordinal_mae": 0.34048062562942505, |
|
"eval_runtime": 188.0938, |
|
"eval_samples_per_second": 23.791, |
|
"eval_steps_per_second": 2.977, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.5921896696090698, |
|
"learning_rate": 2.7611491765837243e-05, |
|
"loss": 0.0441, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.5739880204200745, |
|
"learning_rate": 2.7310974876788075e-05, |
|
"loss": 0.0399, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.6747081279754639, |
|
"learning_rate": 2.7010457987738914e-05, |
|
"loss": 0.0467, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.5710461139678955, |
|
"learning_rate": 2.670994109868975e-05, |
|
"loss": 0.0671, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 0.24321676790714264, |
|
"eval_na_accuracy": 0.7989690899848938, |
|
"eval_ordinal_accuracy": 0.7030567526817322, |
|
"eval_ordinal_mae": 0.3529086709022522, |
|
"eval_runtime": 188.2916, |
|
"eval_samples_per_second": 23.766, |
|
"eval_steps_per_second": 2.974, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.3675624430179596, |
|
"learning_rate": 2.640942420964058e-05, |
|
"loss": 0.0466, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.31548529863357544, |
|
"learning_rate": 2.610890732059142e-05, |
|
"loss": 0.0764, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.43977534770965576, |
|
"learning_rate": 2.5808390431542258e-05, |
|
"loss": 0.0475, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.6533438563346863, |
|
"learning_rate": 2.550787354249309e-05, |
|
"loss": 0.0672, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.2430579513311386, |
|
"eval_na_accuracy": 0.7714776396751404, |
|
"eval_ordinal_accuracy": 0.7194965481758118, |
|
"eval_ordinal_mae": 0.3360508680343628, |
|
"eval_runtime": 186.7478, |
|
"eval_samples_per_second": 23.963, |
|
"eval_steps_per_second": 2.999, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.2441914677619934, |
|
"learning_rate": 2.5207356653443925e-05, |
|
"loss": 0.0503, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.3184777796268463, |
|
"learning_rate": 2.490683976439476e-05, |
|
"loss": 0.0641, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.5295473337173462, |
|
"learning_rate": 2.4606322875345595e-05, |
|
"loss": 0.0453, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.6061444282531738, |
|
"learning_rate": 2.430580598629643e-05, |
|
"loss": 0.0446, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.24466152489185333, |
|
"eval_na_accuracy": 0.7938144207000732, |
|
"eval_ordinal_accuracy": 0.7141022086143494, |
|
"eval_ordinal_mae": 0.34008586406707764, |
|
"eval_runtime": 184.0098, |
|
"eval_samples_per_second": 24.319, |
|
"eval_steps_per_second": 3.043, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.7771974802017212, |
|
"learning_rate": 2.4005289097247265e-05, |
|
"loss": 0.0506, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.48779726028442383, |
|
"learning_rate": 2.37047722081981e-05, |
|
"loss": 0.0625, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.5689972639083862, |
|
"learning_rate": 2.340425531914894e-05, |
|
"loss": 0.0504, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.35768938064575195, |
|
"learning_rate": 2.310373843009977e-05, |
|
"loss": 0.0424, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.24263423681259155, |
|
"eval_na_accuracy": 0.8161512017250061, |
|
"eval_ordinal_accuracy": 0.7017723917961121, |
|
"eval_ordinal_mae": 0.34848281741142273, |
|
"eval_runtime": 184.8006, |
|
"eval_samples_per_second": 24.215, |
|
"eval_steps_per_second": 3.03, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.28792133927345276, |
|
"learning_rate": 2.280322154105061e-05, |
|
"loss": 0.0444, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.5131263136863708, |
|
"learning_rate": 2.2502704652001444e-05, |
|
"loss": 0.0494, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.4703335464000702, |
|
"learning_rate": 2.220218776295228e-05, |
|
"loss": 0.0568, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.48926159739494324, |
|
"learning_rate": 2.1901670873903115e-05, |
|
"loss": 0.0386, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 0.24884438514709473, |
|
"eval_na_accuracy": 0.8127147555351257, |
|
"eval_ordinal_accuracy": 0.7123041152954102, |
|
"eval_ordinal_mae": 0.33866390585899353, |
|
"eval_runtime": 182.7756, |
|
"eval_samples_per_second": 24.484, |
|
"eval_steps_per_second": 3.064, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.7936327457427979, |
|
"learning_rate": 2.160115398485395e-05, |
|
"loss": 0.0553, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.41186073422431946, |
|
"learning_rate": 2.1300637095804785e-05, |
|
"loss": 0.0441, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.7716944813728333, |
|
"learning_rate": 2.1000120206755623e-05, |
|
"loss": 0.0562, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.40576568245887756, |
|
"learning_rate": 2.0699603317706455e-05, |
|
"loss": 0.0736, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 0.24542821943759918, |
|
"eval_na_accuracy": 0.831615149974823, |
|
"eval_ordinal_accuracy": 0.7053686380386353, |
|
"eval_ordinal_mae": 0.3381500244140625, |
|
"eval_runtime": 188.0054, |
|
"eval_samples_per_second": 23.803, |
|
"eval_steps_per_second": 2.979, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 2.147202968597412, |
|
"learning_rate": 2.039908642865729e-05, |
|
"loss": 0.0552, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.0888266563415527, |
|
"learning_rate": 2.009856953960813e-05, |
|
"loss": 0.0598, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.6044988036155701, |
|
"learning_rate": 1.9798052650558964e-05, |
|
"loss": 0.039, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.5873610973358154, |
|
"learning_rate": 1.9497535761509796e-05, |
|
"loss": 0.0421, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 0.25130367279052734, |
|
"eval_na_accuracy": 0.831615149974823, |
|
"eval_ordinal_accuracy": 0.712047278881073, |
|
"eval_ordinal_mae": 0.3393591046333313, |
|
"eval_runtime": 185.2317, |
|
"eval_samples_per_second": 24.159, |
|
"eval_steps_per_second": 3.023, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.5890676975250244, |
|
"learning_rate": 1.9197018872460634e-05, |
|
"loss": 0.0717, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.3779636025428772, |
|
"learning_rate": 1.889650198341147e-05, |
|
"loss": 0.0559, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.38733378052711487, |
|
"learning_rate": 1.8595985094362305e-05, |
|
"loss": 0.0458, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.6435078978538513, |
|
"learning_rate": 1.829546820531314e-05, |
|
"loss": 0.0607, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 0.2546432614326477, |
|
"eval_na_accuracy": 0.8264604806900024, |
|
"eval_ordinal_accuracy": 0.7092216610908508, |
|
"eval_ordinal_mae": 0.33696386218070984, |
|
"eval_runtime": 185.2223, |
|
"eval_samples_per_second": 24.16, |
|
"eval_steps_per_second": 3.023, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.34649285674095154, |
|
"learning_rate": 1.7994951316263975e-05, |
|
"loss": 0.0459, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.7860547304153442, |
|
"learning_rate": 1.769443442721481e-05, |
|
"loss": 0.0483, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.4681091904640198, |
|
"learning_rate": 1.7393917538165645e-05, |
|
"loss": 0.0481, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.788452386856079, |
|
"learning_rate": 1.709340064911648e-05, |
|
"loss": 0.0517, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 0.25944724678993225, |
|
"eval_na_accuracy": 0.8298969268798828, |
|
"eval_ordinal_accuracy": 0.7081941962242126, |
|
"eval_ordinal_mae": 0.3375920057296753, |
|
"eval_runtime": 187.8946, |
|
"eval_samples_per_second": 23.817, |
|
"eval_steps_per_second": 2.98, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.35276201367378235, |
|
"learning_rate": 1.6792883760067316e-05, |
|
"loss": 0.0602, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.607291042804718, |
|
"learning_rate": 1.6492366871018154e-05, |
|
"loss": 0.0426, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.29660192131996155, |
|
"learning_rate": 1.6191849981968986e-05, |
|
"loss": 0.0499, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.6227554082870483, |
|
"learning_rate": 1.589133309291982e-05, |
|
"loss": 0.062, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 0.2532944679260254, |
|
"eval_na_accuracy": 0.8109965920448303, |
|
"eval_ordinal_accuracy": 0.710506021976471, |
|
"eval_ordinal_mae": 0.33686304092407227, |
|
"eval_runtime": 184.4841, |
|
"eval_samples_per_second": 24.257, |
|
"eval_steps_per_second": 3.035, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.37592121958732605, |
|
"learning_rate": 1.559081620387066e-05, |
|
"loss": 0.0529, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.7044575810432434, |
|
"learning_rate": 1.5290299314821495e-05, |
|
"loss": 0.0555, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.47716042399406433, |
|
"learning_rate": 1.4989782425772328e-05, |
|
"loss": 0.0424, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.6846262812614441, |
|
"learning_rate": 1.4689265536723165e-05, |
|
"loss": 0.0664, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 0.2534230649471283, |
|
"eval_na_accuracy": 0.8024054765701294, |
|
"eval_ordinal_accuracy": 0.7184690237045288, |
|
"eval_ordinal_mae": 0.33289140462875366, |
|
"eval_runtime": 185.2999, |
|
"eval_samples_per_second": 24.15, |
|
"eval_steps_per_second": 3.022, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.8216082453727722, |
|
"learning_rate": 1.4388748647674e-05, |
|
"loss": 0.0481, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 0.8088984489440918, |
|
"learning_rate": 1.4088231758624834e-05, |
|
"loss": 0.0575, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.7712150812149048, |
|
"learning_rate": 1.3787714869575672e-05, |
|
"loss": 0.0464, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.2501738369464874, |
|
"learning_rate": 1.3487197980526506e-05, |
|
"loss": 0.0389, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.246970534324646, |
|
"eval_na_accuracy": 0.8092783689498901, |
|
"eval_ordinal_accuracy": 0.7259182929992676, |
|
"eval_ordinal_mae": 0.328827440738678, |
|
"eval_runtime": 185.0735, |
|
"eval_samples_per_second": 24.18, |
|
"eval_steps_per_second": 3.026, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.6835588216781616, |
|
"learning_rate": 1.318668109147734e-05, |
|
"loss": 0.0475, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.5996441841125488, |
|
"learning_rate": 1.2886164202428178e-05, |
|
"loss": 0.0443, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 2.0018677711486816, |
|
"learning_rate": 1.2585647313379013e-05, |
|
"loss": 0.0425, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 0.300843209028244, |
|
"learning_rate": 1.2285130424329848e-05, |
|
"loss": 0.0671, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.25160375237464905, |
|
"eval_na_accuracy": 0.8041236996650696, |
|
"eval_ordinal_accuracy": 0.7159003615379333, |
|
"eval_ordinal_mae": 0.3293640613555908, |
|
"eval_runtime": 186.9929, |
|
"eval_samples_per_second": 23.931, |
|
"eval_steps_per_second": 2.995, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.677975058555603, |
|
"learning_rate": 1.1984613535280683e-05, |
|
"loss": 0.0438, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.45172086358070374, |
|
"learning_rate": 1.1684096646231518e-05, |
|
"loss": 0.0503, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 0.8617995381355286, |
|
"learning_rate": 1.1383579757182353e-05, |
|
"loss": 0.0426, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.47193852066993713, |
|
"learning_rate": 1.108306286813319e-05, |
|
"loss": 0.0416, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 0.25071558356285095, |
|
"eval_na_accuracy": 0.8058419227600098, |
|
"eval_ordinal_accuracy": 0.7133316397666931, |
|
"eval_ordinal_mae": 0.33067333698272705, |
|
"eval_runtime": 187.7549, |
|
"eval_samples_per_second": 23.834, |
|
"eval_steps_per_second": 2.983, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 1.0205371379852295, |
|
"learning_rate": 1.0782545979084024e-05, |
|
"loss": 0.0495, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.8885800838470459, |
|
"learning_rate": 1.048202909003486e-05, |
|
"loss": 0.0407, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.6127232313156128, |
|
"learning_rate": 1.0181512200985696e-05, |
|
"loss": 0.0611, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.7351526021957397, |
|
"learning_rate": 9.880995311936533e-06, |
|
"loss": 0.0541, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 0.25292566418647766, |
|
"eval_na_accuracy": 0.8058419227600098, |
|
"eval_ordinal_accuracy": 0.71101975440979, |
|
"eval_ordinal_mae": 0.335502564907074, |
|
"eval_runtime": 185.7438, |
|
"eval_samples_per_second": 24.092, |
|
"eval_steps_per_second": 3.015, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.715017557144165, |
|
"learning_rate": 9.580478422887366e-06, |
|
"loss": 0.0581, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 0.2852751910686493, |
|
"learning_rate": 9.279961533838203e-06, |
|
"loss": 0.0507, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.5301045775413513, |
|
"learning_rate": 8.979444644789038e-06, |
|
"loss": 0.0427, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.3962666094303131, |
|
"learning_rate": 8.678927755739873e-06, |
|
"loss": 0.0374, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 0.2529982626438141, |
|
"eval_na_accuracy": 0.8109965920448303, |
|
"eval_ordinal_accuracy": 0.7148728370666504, |
|
"eval_ordinal_mae": 0.33152100443840027, |
|
"eval_runtime": 184.9088, |
|
"eval_samples_per_second": 24.201, |
|
"eval_steps_per_second": 3.029, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.5724599957466125, |
|
"learning_rate": 8.378410866690708e-06, |
|
"loss": 0.0383, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.6337906718254089, |
|
"learning_rate": 8.077893977641545e-06, |
|
"loss": 0.0339, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 0.645459771156311, |
|
"learning_rate": 7.777377088592379e-06, |
|
"loss": 0.0485, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 5.039384841918945, |
|
"learning_rate": 7.476860199543215e-06, |
|
"loss": 0.04, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 0.2519625723361969, |
|
"eval_na_accuracy": 0.80756014585495, |
|
"eval_ordinal_accuracy": 0.7166709303855896, |
|
"eval_ordinal_mae": 0.32904064655303955, |
|
"eval_runtime": 188.802, |
|
"eval_samples_per_second": 23.702, |
|
"eval_steps_per_second": 2.966, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.30601269006729126, |
|
"learning_rate": 7.176343310494051e-06, |
|
"loss": 0.053, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.6143732070922852, |
|
"learning_rate": 6.875826421444885e-06, |
|
"loss": 0.0433, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.5583890080451965, |
|
"learning_rate": 6.575309532395721e-06, |
|
"loss": 0.0534, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.5204710960388184, |
|
"learning_rate": 6.274792643346557e-06, |
|
"loss": 0.0507, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 0.2555212676525116, |
|
"eval_na_accuracy": 0.8127147555351257, |
|
"eval_ordinal_accuracy": 0.710506021976471, |
|
"eval_ordinal_mae": 0.3297020196914673, |
|
"eval_runtime": 185.0982, |
|
"eval_samples_per_second": 24.176, |
|
"eval_steps_per_second": 3.025, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.11941999197006226, |
|
"learning_rate": 5.974275754297392e-06, |
|
"loss": 0.0392, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.20734691619873047, |
|
"learning_rate": 5.673758865248227e-06, |
|
"loss": 0.06, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.48085981607437134, |
|
"learning_rate": 5.373241976199063e-06, |
|
"loss": 0.0502, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.5097972750663757, |
|
"learning_rate": 5.072725087149898e-06, |
|
"loss": 0.0379, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 0.2531285285949707, |
|
"eval_na_accuracy": 0.8127147555351257, |
|
"eval_ordinal_accuracy": 0.7161571979522705, |
|
"eval_ordinal_mae": 0.3273853659629822, |
|
"eval_runtime": 183.7697, |
|
"eval_samples_per_second": 24.351, |
|
"eval_steps_per_second": 3.047, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 0.1783066987991333, |
|
"learning_rate": 4.7722081981007335e-06, |
|
"loss": 0.0455, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.6303412318229675, |
|
"learning_rate": 4.4716913090515695e-06, |
|
"loss": 0.0639, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.7201049327850342, |
|
"learning_rate": 4.171174420002405e-06, |
|
"loss": 0.0373, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.31524857878685, |
|
"learning_rate": 3.87065753095324e-06, |
|
"loss": 0.0736, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 0.25256654620170593, |
|
"eval_na_accuracy": 0.8195876479148865, |
|
"eval_ordinal_accuracy": 0.7164140939712524, |
|
"eval_ordinal_mae": 0.3279343843460083, |
|
"eval_runtime": 187.2067, |
|
"eval_samples_per_second": 23.904, |
|
"eval_steps_per_second": 2.991, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.1482267528772354, |
|
"learning_rate": 3.5701406419040754e-06, |
|
"loss": 0.0363, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.6438891887664795, |
|
"learning_rate": 3.2696237528549105e-06, |
|
"loss": 0.043, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 0.5657501816749573, |
|
"learning_rate": 2.969106863805746e-06, |
|
"loss": 0.0351, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.35640329122543335, |
|
"learning_rate": 2.6685899747565817e-06, |
|
"loss": 0.0589, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 0.2521790862083435, |
|
"eval_na_accuracy": 0.8161512017250061, |
|
"eval_ordinal_accuracy": 0.7143591046333313, |
|
"eval_ordinal_mae": 0.3267035186290741, |
|
"eval_runtime": 185.6368, |
|
"eval_samples_per_second": 24.106, |
|
"eval_steps_per_second": 3.017, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.6037762761116028, |
|
"learning_rate": 2.368073085707417e-06, |
|
"loss": 0.052, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.5858399868011475, |
|
"learning_rate": 2.067556196658252e-06, |
|
"loss": 0.043, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.9954311847686768, |
|
"learning_rate": 1.7670393076090878e-06, |
|
"loss": 0.0708, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.448538601398468, |
|
"learning_rate": 1.4665224185599232e-06, |
|
"loss": 0.0449, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.252143532037735, |
|
"eval_na_accuracy": 0.8161512017250061, |
|
"eval_ordinal_accuracy": 0.7148728370666504, |
|
"eval_ordinal_mae": 0.3271646499633789, |
|
"eval_runtime": 188.3236, |
|
"eval_samples_per_second": 23.762, |
|
"eval_steps_per_second": 2.974, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 0.3916318416595459, |
|
"learning_rate": 1.1660055295107585e-06, |
|
"loss": 0.0519, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.7591487765312195, |
|
"learning_rate": 8.654886404615941e-07, |
|
"loss": 0.0447, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.33720019459724426, |
|
"learning_rate": 5.649717514124295e-07, |
|
"loss": 0.0486, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.5342977046966553, |
|
"learning_rate": 2.644548623632648e-07, |
|
"loss": 0.0498, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 0.2520281970500946, |
|
"eval_na_accuracy": 0.8144329786300659, |
|
"eval_ordinal_accuracy": 0.7166709303855896, |
|
"eval_ordinal_mae": 0.3264659643173218, |
|
"eval_runtime": 186.96, |
|
"eval_samples_per_second": 23.936, |
|
"eval_steps_per_second": 2.995, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 8319, |
|
"total_flos": 1.0314863567841853e+19, |
|
"train_loss": 0.13915026724546314, |
|
"train_runtime": 27077.9759, |
|
"train_samples_per_second": 4.916, |
|
"train_steps_per_second": 0.307 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 8319, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.0314863567841853e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|