|
{ |
|
"best_metric": 0.16356664896011353, |
|
"best_model_checkpoint": "./vit-indian-food/checkpoint-500", |
|
"epoch": 2.6178010471204187, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.967112064361572, |
|
"learning_rate": 0.00019895287958115185, |
|
"loss": 2.3423, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.768218517303467, |
|
"learning_rate": 0.00019790575916230367, |
|
"loss": 1.5082, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.283707141876221, |
|
"learning_rate": 0.0001968586387434555, |
|
"loss": 1.1555, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.167559623718262, |
|
"learning_rate": 0.00019581151832460733, |
|
"loss": 0.7045, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.519095420837402, |
|
"learning_rate": 0.00019476439790575917, |
|
"loss": 0.5942, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.0483479499816895, |
|
"learning_rate": 0.000193717277486911, |
|
"loss": 0.45, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 4.359745025634766, |
|
"learning_rate": 0.00019267015706806283, |
|
"loss": 0.3833, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 5.201784610748291, |
|
"learning_rate": 0.00019162303664921465, |
|
"loss": 0.3702, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.259886741638184, |
|
"learning_rate": 0.0001905759162303665, |
|
"loss": 0.3227, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.89500617980957, |
|
"learning_rate": 0.00018952879581151833, |
|
"loss": 0.2368, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.9002624671916011, |
|
"eval_loss": 0.33144867420196533, |
|
"eval_precision": 0.9131344269428404, |
|
"eval_recall": 0.9002624671916011, |
|
"eval_runtime": 17.436, |
|
"eval_samples_per_second": 43.703, |
|
"eval_steps_per_second": 2.753, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.650336265563965, |
|
"learning_rate": 0.00018848167539267018, |
|
"loss": 0.4139, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 4.881960391998291, |
|
"learning_rate": 0.00018743455497382202, |
|
"loss": 0.2109, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 5.3574066162109375, |
|
"learning_rate": 0.00018638743455497384, |
|
"loss": 0.3076, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 5.008607864379883, |
|
"learning_rate": 0.00018534031413612568, |
|
"loss": 0.2632, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.561398506164551, |
|
"learning_rate": 0.0001842931937172775, |
|
"loss": 0.2062, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 5.034663677215576, |
|
"learning_rate": 0.00018324607329842934, |
|
"loss": 0.4752, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.143318176269531, |
|
"learning_rate": 0.00018219895287958115, |
|
"loss": 0.3963, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 6.556373119354248, |
|
"learning_rate": 0.000181151832460733, |
|
"loss": 0.5088, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.414945125579834, |
|
"learning_rate": 0.0001801047120418848, |
|
"loss": 0.2903, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.464193820953369, |
|
"learning_rate": 0.00017905759162303666, |
|
"loss": 0.2801, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.9422572178477691, |
|
"eval_loss": 0.18048594892024994, |
|
"eval_precision": 0.9466471604125616, |
|
"eval_recall": 0.9422572178477691, |
|
"eval_runtime": 17.2463, |
|
"eval_samples_per_second": 44.183, |
|
"eval_steps_per_second": 2.783, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 5.096113681793213, |
|
"learning_rate": 0.0001780104712041885, |
|
"loss": 0.213, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.4251294136047363, |
|
"learning_rate": 0.00017696335078534032, |
|
"loss": 0.1831, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.766965627670288, |
|
"learning_rate": 0.00017591623036649216, |
|
"loss": 0.1636, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.66446852684021, |
|
"learning_rate": 0.00017486910994764398, |
|
"loss": 0.3075, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.702181339263916, |
|
"learning_rate": 0.00017382198952879582, |
|
"loss": 0.2472, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.463685035705566, |
|
"learning_rate": 0.00017277486910994763, |
|
"loss": 0.4341, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 5.947971343994141, |
|
"learning_rate": 0.00017172774869109948, |
|
"loss": 0.2291, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 5.682934761047363, |
|
"learning_rate": 0.00017068062827225132, |
|
"loss": 0.1632, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 9.100614547729492, |
|
"learning_rate": 0.00016963350785340316, |
|
"loss": 0.307, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.207605361938477, |
|
"learning_rate": 0.00016858638743455498, |
|
"loss": 0.2362, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9186351706036745, |
|
"eval_loss": 0.29145774245262146, |
|
"eval_precision": 0.9273784378254858, |
|
"eval_recall": 0.9186351706036745, |
|
"eval_runtime": 17.4226, |
|
"eval_samples_per_second": 43.736, |
|
"eval_steps_per_second": 2.755, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.4303929805755615, |
|
"learning_rate": 0.00016753926701570682, |
|
"loss": 0.074, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 4.852244853973389, |
|
"learning_rate": 0.00016649214659685867, |
|
"loss": 0.2731, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.3142894506454468, |
|
"learning_rate": 0.00016544502617801048, |
|
"loss": 0.1295, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.892892360687256, |
|
"learning_rate": 0.00016439790575916233, |
|
"loss": 0.225, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 4.833043575286865, |
|
"learning_rate": 0.00016335078534031414, |
|
"loss": 0.2674, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 5.150189399719238, |
|
"learning_rate": 0.00016230366492146599, |
|
"loss": 0.241, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.426727771759033, |
|
"learning_rate": 0.0001612565445026178, |
|
"loss": 0.2049, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 5.2064738273620605, |
|
"learning_rate": 0.00016020942408376964, |
|
"loss": 0.1756, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.034318685531616, |
|
"learning_rate": 0.00015916230366492146, |
|
"loss": 0.0499, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 4.4703145027160645, |
|
"learning_rate": 0.0001581151832460733, |
|
"loss": 0.042, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.9461942257217848, |
|
"eval_loss": 0.16130226850509644, |
|
"eval_precision": 0.9511635531837818, |
|
"eval_recall": 0.9461942257217848, |
|
"eval_runtime": 17.3623, |
|
"eval_samples_per_second": 43.888, |
|
"eval_steps_per_second": 2.765, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.06716866791248322, |
|
"learning_rate": 0.00015706806282722515, |
|
"loss": 0.0643, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 7.002842903137207, |
|
"learning_rate": 0.00015602094240837696, |
|
"loss": 0.1028, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.013943655416369438, |
|
"learning_rate": 0.0001549738219895288, |
|
"loss": 0.0941, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.42214348912239075, |
|
"learning_rate": 0.00015392670157068062, |
|
"loss": 0.0273, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 8.428805351257324, |
|
"learning_rate": 0.00015287958115183247, |
|
"loss": 0.0589, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.468398094177246, |
|
"learning_rate": 0.00015183246073298428, |
|
"loss": 0.0321, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.8661732077598572, |
|
"learning_rate": 0.00015078534031413612, |
|
"loss": 0.0856, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.4541776478290558, |
|
"learning_rate": 0.00014973821989528797, |
|
"loss": 0.0136, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 4.832011699676514, |
|
"learning_rate": 0.0001486910994764398, |
|
"loss": 0.1087, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 4.744746685028076, |
|
"learning_rate": 0.00014764397905759163, |
|
"loss": 0.0477, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.9514435695538058, |
|
"eval_loss": 0.14496225118637085, |
|
"eval_precision": 0.9534067355318784, |
|
"eval_recall": 0.9514435695538058, |
|
"eval_runtime": 17.4463, |
|
"eval_samples_per_second": 43.677, |
|
"eval_steps_per_second": 2.751, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.053365129977464676, |
|
"learning_rate": 0.00014659685863874347, |
|
"loss": 0.0171, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.41375842690467834, |
|
"learning_rate": 0.00014554973821989531, |
|
"loss": 0.1189, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.05034720525145531, |
|
"learning_rate": 0.00014450261780104713, |
|
"loss": 0.0377, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 2.498413324356079, |
|
"learning_rate": 0.00014345549738219897, |
|
"loss": 0.0334, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.06190189719200134, |
|
"learning_rate": 0.0001424083769633508, |
|
"loss": 0.0158, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.9862781763076782, |
|
"learning_rate": 0.00014136125654450263, |
|
"loss": 0.0186, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.047805577516555786, |
|
"learning_rate": 0.00014031413612565445, |
|
"loss": 0.0705, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.03765374794602394, |
|
"learning_rate": 0.0001392670157068063, |
|
"loss": 0.1822, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 5.020593643188477, |
|
"learning_rate": 0.0001382198952879581, |
|
"loss": 0.0375, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.0980048179626465, |
|
"learning_rate": 0.00013717277486910995, |
|
"loss": 0.1297, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.937007874015748, |
|
"eval_loss": 0.24536970257759094, |
|
"eval_precision": 0.9442846541005356, |
|
"eval_recall": 0.937007874015748, |
|
"eval_runtime": 17.5121, |
|
"eval_samples_per_second": 43.513, |
|
"eval_steps_per_second": 2.741, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 3.535846471786499, |
|
"learning_rate": 0.0001361256544502618, |
|
"loss": 0.1071, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 4.182868957519531, |
|
"learning_rate": 0.0001350785340314136, |
|
"loss": 0.0249, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.36884206533432007, |
|
"learning_rate": 0.00013403141361256545, |
|
"loss": 0.0762, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 5.14253044128418, |
|
"learning_rate": 0.00013298429319371727, |
|
"loss": 0.0766, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.2552727460861206, |
|
"learning_rate": 0.0001319371727748691, |
|
"loss": 0.0093, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 3.767280101776123, |
|
"learning_rate": 0.00013089005235602096, |
|
"loss": 0.2232, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.010386645793914795, |
|
"learning_rate": 0.00012984293193717277, |
|
"loss": 0.0026, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 4.292264461517334, |
|
"learning_rate": 0.00012879581151832462, |
|
"loss": 0.2457, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.024649567902088165, |
|
"learning_rate": 0.00012774869109947646, |
|
"loss": 0.0306, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 5.098654270172119, |
|
"learning_rate": 0.00012670157068062827, |
|
"loss": 0.1156, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.9501312335958005, |
|
"eval_loss": 0.17998863756656647, |
|
"eval_precision": 0.9531660720469141, |
|
"eval_recall": 0.9501312335958005, |
|
"eval_runtime": 17.4136, |
|
"eval_samples_per_second": 43.759, |
|
"eval_steps_per_second": 2.756, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.9651967287063599, |
|
"learning_rate": 0.00012565445026178012, |
|
"loss": 0.0618, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 2.727735996246338, |
|
"learning_rate": 0.00012460732984293196, |
|
"loss": 0.0705, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 4.953824043273926, |
|
"learning_rate": 0.00012356020942408378, |
|
"loss": 0.2112, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.08269181102514267, |
|
"learning_rate": 0.00012251308900523562, |
|
"loss": 0.071, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.06396706402301788, |
|
"learning_rate": 0.00012146596858638744, |
|
"loss": 0.1069, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.28295230865478516, |
|
"learning_rate": 0.00012041884816753928, |
|
"loss": 0.0759, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 1.5805819034576416, |
|
"learning_rate": 0.0001193717277486911, |
|
"loss": 0.0622, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 2.3917598724365234, |
|
"learning_rate": 0.00011832460732984294, |
|
"loss": 0.0512, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.03971586003899574, |
|
"learning_rate": 0.00011727748691099475, |
|
"loss": 0.0163, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.024471383541822433, |
|
"learning_rate": 0.0001162303664921466, |
|
"loss": 0.0188, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_accuracy": 0.9593175853018373, |
|
"eval_loss": 0.14536795020103455, |
|
"eval_precision": 0.9601421810596481, |
|
"eval_recall": 0.9593175853018373, |
|
"eval_runtime": 17.4, |
|
"eval_samples_per_second": 43.793, |
|
"eval_steps_per_second": 2.759, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.0705149918794632, |
|
"learning_rate": 0.00011518324607329844, |
|
"loss": 0.0062, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.013097544200718403, |
|
"learning_rate": 0.00011413612565445027, |
|
"loss": 0.0191, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.5236541628837585, |
|
"learning_rate": 0.00011308900523560211, |
|
"loss": 0.0403, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.01967461034655571, |
|
"learning_rate": 0.00011204188481675393, |
|
"loss": 0.0032, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.02198900654911995, |
|
"learning_rate": 0.00011099476439790577, |
|
"loss": 0.0019, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.14719614386558533, |
|
"learning_rate": 0.00010994764397905759, |
|
"loss": 0.0052, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.021793629974126816, |
|
"learning_rate": 0.00010890052356020943, |
|
"loss": 0.0105, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.02368028089404106, |
|
"learning_rate": 0.00010785340314136125, |
|
"loss": 0.001, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.010441082529723644, |
|
"learning_rate": 0.00010680628272251309, |
|
"loss": 0.0019, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.015402965247631073, |
|
"learning_rate": 0.00010575916230366492, |
|
"loss": 0.0049, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.9711286089238845, |
|
"eval_loss": 0.1082761362195015, |
|
"eval_precision": 0.9714506772867647, |
|
"eval_recall": 0.9711286089238845, |
|
"eval_runtime": 17.6808, |
|
"eval_samples_per_second": 43.098, |
|
"eval_steps_per_second": 2.715, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.07379986345767975, |
|
"learning_rate": 0.00010471204188481676, |
|
"loss": 0.0047, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 5.459177494049072, |
|
"learning_rate": 0.0001036649214659686, |
|
"loss": 0.0192, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.004949438851326704, |
|
"learning_rate": 0.00010261780104712042, |
|
"loss": 0.0148, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.17624743282794952, |
|
"learning_rate": 0.00010157068062827227, |
|
"loss": 0.0087, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.010408706963062286, |
|
"learning_rate": 0.00010052356020942408, |
|
"loss": 0.0018, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.007278731558471918, |
|
"learning_rate": 9.947643979057593e-05, |
|
"loss": 0.0029, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.01558419968932867, |
|
"learning_rate": 9.842931937172776e-05, |
|
"loss": 0.0549, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.005123753100633621, |
|
"learning_rate": 9.738219895287959e-05, |
|
"loss": 0.0006, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.011126354336738586, |
|
"learning_rate": 9.633507853403142e-05, |
|
"loss": 0.003, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.02012033388018608, |
|
"learning_rate": 9.528795811518324e-05, |
|
"loss": 0.0139, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.963254593175853, |
|
"eval_loss": 0.16356664896011353, |
|
"eval_precision": 0.9644146563432678, |
|
"eval_recall": 0.963254593175853, |
|
"eval_runtime": 17.6855, |
|
"eval_samples_per_second": 43.086, |
|
"eval_steps_per_second": 2.714, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 6.186131303461724e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|