|
{ |
|
"best_metric": 0.15805459022521973, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.0835421888053467, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000835421888053467, |
|
"grad_norm": 7.261774063110352, |
|
"learning_rate": 1e-06, |
|
"loss": 1.5114, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000835421888053467, |
|
"eval_loss": 1.6066429615020752, |
|
"eval_runtime": 38.4253, |
|
"eval_samples_per_second": 13.116, |
|
"eval_steps_per_second": 3.279, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001670843776106934, |
|
"grad_norm": 17.708011627197266, |
|
"learning_rate": 2e-06, |
|
"loss": 1.5639, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002506265664160401, |
|
"grad_norm": 19.94588851928711, |
|
"learning_rate": 3e-06, |
|
"loss": 2.2871, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003341687552213868, |
|
"grad_norm": 11.027591705322266, |
|
"learning_rate": 4e-06, |
|
"loss": 1.1737, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004177109440267335, |
|
"grad_norm": 19.168018341064453, |
|
"learning_rate": 4.9999999999999996e-06, |
|
"loss": 1.6066, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005012531328320802, |
|
"grad_norm": 11.331216812133789, |
|
"learning_rate": 6e-06, |
|
"loss": 1.7864, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005847953216374269, |
|
"grad_norm": 12.50044059753418, |
|
"learning_rate": 7e-06, |
|
"loss": 1.3554, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006683375104427736, |
|
"grad_norm": 11.597599983215332, |
|
"learning_rate": 8e-06, |
|
"loss": 1.2366, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007518796992481203, |
|
"grad_norm": 5.6630682945251465, |
|
"learning_rate": 9e-06, |
|
"loss": 0.9487, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00835421888053467, |
|
"grad_norm": 5.019942283630371, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 1.0092, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009189640768588136, |
|
"grad_norm": 8.010810852050781, |
|
"learning_rate": 1.1e-05, |
|
"loss": 1.4717, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010025062656641603, |
|
"grad_norm": 6.473798751831055, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.8519, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01086048454469507, |
|
"grad_norm": 4.7110595703125, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.7024, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011695906432748537, |
|
"grad_norm": 4.263626575469971, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.7116, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.012531328320802004, |
|
"grad_norm": 3.472412586212158, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.5141, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013366750208855471, |
|
"grad_norm": 3.362398624420166, |
|
"learning_rate": 1.6e-05, |
|
"loss": 0.6892, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014202172096908938, |
|
"grad_norm": 2.528292179107666, |
|
"learning_rate": 1.7e-05, |
|
"loss": 0.3903, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.015037593984962405, |
|
"grad_norm": 1.7493354082107544, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.2833, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.015873015873015872, |
|
"grad_norm": 1.5588998794555664, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.246, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01670843776106934, |
|
"grad_norm": 1.64949369430542, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.2675, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017543859649122806, |
|
"grad_norm": 0.8435382843017578, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.1553, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.018379281537176273, |
|
"grad_norm": 0.6495569348335266, |
|
"learning_rate": 2.2e-05, |
|
"loss": 0.1196, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01921470342522974, |
|
"grad_norm": 1.0182844400405884, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.1567, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.020050125313283207, |
|
"grad_norm": 2.8188316822052, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.2064, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.020885547201336674, |
|
"grad_norm": 1.613542914390564, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.173, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02172096908939014, |
|
"grad_norm": 1.5958133935928345, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.184, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.022556390977443608, |
|
"grad_norm": 0.7841217517852783, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.1614, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.023391812865497075, |
|
"grad_norm": 1.057112455368042, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.1174, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02422723475355054, |
|
"grad_norm": 2.9374876022338867, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.4939, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02506265664160401, |
|
"grad_norm": 0.8594872355461121, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1437, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.025898078529657476, |
|
"grad_norm": 0.6192961931228638, |
|
"learning_rate": 2.9984895998119723e-05, |
|
"loss": 0.1466, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.026733500417710943, |
|
"grad_norm": 1.5436159372329712, |
|
"learning_rate": 2.993961440992859e-05, |
|
"loss": 0.2683, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02756892230576441, |
|
"grad_norm": 0.42387092113494873, |
|
"learning_rate": 2.9864246426519023e-05, |
|
"loss": 0.1387, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.028404344193817876, |
|
"grad_norm": 3.636671304702759, |
|
"learning_rate": 2.9758943828979444e-05, |
|
"loss": 0.2699, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.029239766081871343, |
|
"grad_norm": 0.8583257794380188, |
|
"learning_rate": 2.9623918682727355e-05, |
|
"loss": 0.1803, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03007518796992481, |
|
"grad_norm": 1.826141595840454, |
|
"learning_rate": 2.9459442910437798e-05, |
|
"loss": 0.3046, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.030910609857978277, |
|
"grad_norm": 0.7456461787223816, |
|
"learning_rate": 2.9265847744427305e-05, |
|
"loss": 0.1901, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.031746031746031744, |
|
"grad_norm": 0.8312434554100037, |
|
"learning_rate": 2.904352305959606e-05, |
|
"loss": 0.2098, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03258145363408521, |
|
"grad_norm": 0.42538195848464966, |
|
"learning_rate": 2.8792916588271762e-05, |
|
"loss": 0.1526, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03341687552213868, |
|
"grad_norm": 0.3908769488334656, |
|
"learning_rate": 2.8514533018536286e-05, |
|
"loss": 0.1633, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.034252297410192145, |
|
"grad_norm": 0.3974226713180542, |
|
"learning_rate": 2.820893297785107e-05, |
|
"loss": 0.1491, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03508771929824561, |
|
"grad_norm": 0.36495357751846313, |
|
"learning_rate": 2.7876731904027994e-05, |
|
"loss": 0.1951, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03592314118629908, |
|
"grad_norm": 0.560374915599823, |
|
"learning_rate": 2.7518598805819542e-05, |
|
"loss": 0.1859, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.036758563074352546, |
|
"grad_norm": 0.3539278209209442, |
|
"learning_rate": 2.7135254915624213e-05, |
|
"loss": 0.15, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03759398496240601, |
|
"grad_norm": 1.142353892326355, |
|
"learning_rate": 2.672747223702045e-05, |
|
"loss": 0.174, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03842940685045948, |
|
"grad_norm": 0.7058913111686707, |
|
"learning_rate": 2.6296071990054167e-05, |
|
"loss": 0.1992, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03926482873851295, |
|
"grad_norm": 0.38486120104789734, |
|
"learning_rate": 2.5841922957410875e-05, |
|
"loss": 0.1652, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.040100250626566414, |
|
"grad_norm": 3.6414639949798584, |
|
"learning_rate": 2.5365939734802973e-05, |
|
"loss": 0.4362, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04093567251461988, |
|
"grad_norm": 0.3310062885284424, |
|
"learning_rate": 2.4869080889095693e-05, |
|
"loss": 0.2132, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04177109440267335, |
|
"grad_norm": 0.39665135741233826, |
|
"learning_rate": 2.4352347027881003e-05, |
|
"loss": 0.2063, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04177109440267335, |
|
"eval_loss": 0.1654907464981079, |
|
"eval_runtime": 39.1179, |
|
"eval_samples_per_second": 12.884, |
|
"eval_steps_per_second": 3.221, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.042606516290726815, |
|
"grad_norm": 0.3080545663833618, |
|
"learning_rate": 2.3816778784387097e-05, |
|
"loss": 0.1177, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04344193817878028, |
|
"grad_norm": 0.40985429286956787, |
|
"learning_rate": 2.3263454721781537e-05, |
|
"loss": 0.1433, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04427736006683375, |
|
"grad_norm": 0.15664848685264587, |
|
"learning_rate": 2.2693489161088592e-05, |
|
"loss": 0.0884, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.045112781954887216, |
|
"grad_norm": 0.15345360338687897, |
|
"learning_rate": 2.210802993709498e-05, |
|
"loss": 0.1044, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04594820384294068, |
|
"grad_norm": 0.20342934131622314, |
|
"learning_rate": 2.1508256086763372e-05, |
|
"loss": 0.1088, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04678362573099415, |
|
"grad_norm": 0.16992981731891632, |
|
"learning_rate": 2.0895375474808857e-05, |
|
"loss": 0.1459, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 0.17031916975975037, |
|
"learning_rate": 2.0270622361220143e-05, |
|
"loss": 0.1282, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04845446950710108, |
|
"grad_norm": 0.2852511703968048, |
|
"learning_rate": 1.963525491562421e-05, |
|
"loss": 0.1329, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04928989139515455, |
|
"grad_norm": 0.4877017140388489, |
|
"learning_rate": 1.8990552683500128e-05, |
|
"loss": 0.1365, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05012531328320802, |
|
"grad_norm": 0.3902769684791565, |
|
"learning_rate": 1.8337814009344716e-05, |
|
"loss": 0.1584, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.050960735171261484, |
|
"grad_norm": 0.2292199432849884, |
|
"learning_rate": 1.767835342197955e-05, |
|
"loss": 0.1402, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05179615705931495, |
|
"grad_norm": 0.2017497420310974, |
|
"learning_rate": 1.7013498987264832e-05, |
|
"loss": 0.1482, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05263157894736842, |
|
"grad_norm": 0.2939629852771759, |
|
"learning_rate": 1.6344589633551502e-05, |
|
"loss": 0.1461, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.053467000835421885, |
|
"grad_norm": 0.1926645189523697, |
|
"learning_rate": 1.5672972455257726e-05, |
|
"loss": 0.1044, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05430242272347535, |
|
"grad_norm": 0.3081521987915039, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1596, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05513784461152882, |
|
"grad_norm": 0.4025512933731079, |
|
"learning_rate": 1.4327027544742281e-05, |
|
"loss": 0.1735, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.055973266499582286, |
|
"grad_norm": 0.20314837992191315, |
|
"learning_rate": 1.36554103664485e-05, |
|
"loss": 0.125, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05680868838763575, |
|
"grad_norm": 0.20744721591472626, |
|
"learning_rate": 1.2986501012735174e-05, |
|
"loss": 0.1095, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05764411027568922, |
|
"grad_norm": 0.2043089121580124, |
|
"learning_rate": 1.2321646578020452e-05, |
|
"loss": 0.1544, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05847953216374269, |
|
"grad_norm": 0.2829361855983734, |
|
"learning_rate": 1.1662185990655285e-05, |
|
"loss": 0.1347, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.059314954051796154, |
|
"grad_norm": 0.2790044844150543, |
|
"learning_rate": 1.1009447316499875e-05, |
|
"loss": 0.1564, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06015037593984962, |
|
"grad_norm": 2.54337739944458, |
|
"learning_rate": 1.036474508437579e-05, |
|
"loss": 0.1629, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06098579782790309, |
|
"grad_norm": 0.279619425535202, |
|
"learning_rate": 9.729377638779859e-06, |
|
"loss": 0.1506, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.061821219715956555, |
|
"grad_norm": 0.2148342877626419, |
|
"learning_rate": 9.104624525191147e-06, |
|
"loss": 0.1222, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06265664160401002, |
|
"grad_norm": 3.0420451164245605, |
|
"learning_rate": 8.491743913236629e-06, |
|
"loss": 0.1404, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06349206349206349, |
|
"grad_norm": 0.2432384490966797, |
|
"learning_rate": 7.89197006290502e-06, |
|
"loss": 0.1076, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06432748538011696, |
|
"grad_norm": 3.1877377033233643, |
|
"learning_rate": 7.30651083891141e-06, |
|
"loss": 0.152, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06516290726817042, |
|
"grad_norm": 0.4180755019187927, |
|
"learning_rate": 6.736545278218464e-06, |
|
"loss": 0.1127, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06599832915622389, |
|
"grad_norm": 0.25477147102355957, |
|
"learning_rate": 6.1832212156129045e-06, |
|
"loss": 0.1277, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06683375104427736, |
|
"grad_norm": 0.399618536233902, |
|
"learning_rate": 5.647652972118998e-06, |
|
"loss": 0.1418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06766917293233082, |
|
"grad_norm": 0.27185890078544617, |
|
"learning_rate": 5.130919110904311e-06, |
|
"loss": 0.155, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06850459482038429, |
|
"grad_norm": 1.3203309774398804, |
|
"learning_rate": 4.6340602651970304e-06, |
|
"loss": 0.3033, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06934001670843776, |
|
"grad_norm": 0.3346821367740631, |
|
"learning_rate": 4.158077042589129e-06, |
|
"loss": 0.1639, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07017543859649122, |
|
"grad_norm": 0.3132845163345337, |
|
"learning_rate": 3.7039280099458373e-06, |
|
"loss": 0.1417, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07101086048454469, |
|
"grad_norm": 0.30599430203437805, |
|
"learning_rate": 3.272527762979553e-06, |
|
"loss": 0.1666, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07184628237259816, |
|
"grad_norm": 0.5425288677215576, |
|
"learning_rate": 2.86474508437579e-06, |
|
"loss": 0.1837, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07268170426065163, |
|
"grad_norm": 0.2609088122844696, |
|
"learning_rate": 2.4814011941804603e-06, |
|
"loss": 0.1981, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07351712614870509, |
|
"grad_norm": 0.2561860680580139, |
|
"learning_rate": 2.1232680959720085e-06, |
|
"loss": 0.1169, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07435254803675856, |
|
"grad_norm": 0.26870644092559814, |
|
"learning_rate": 1.79106702214893e-06, |
|
"loss": 0.162, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07518796992481203, |
|
"grad_norm": 0.35758742690086365, |
|
"learning_rate": 1.4854669814637145e-06, |
|
"loss": 0.1946, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07602339181286549, |
|
"grad_norm": 0.40720826387405396, |
|
"learning_rate": 1.2070834117282414e-06, |
|
"loss": 0.1676, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07685881370091896, |
|
"grad_norm": 0.2641846835613251, |
|
"learning_rate": 9.56476940403942e-07, |
|
"loss": 0.1419, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07769423558897243, |
|
"grad_norm": 0.32390695810317993, |
|
"learning_rate": 7.341522555726971e-07, |
|
"loss": 0.1727, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0785296574770259, |
|
"grad_norm": 0.17990685999393463, |
|
"learning_rate": 5.405570895622014e-07, |
|
"loss": 0.1278, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07936507936507936, |
|
"grad_norm": 0.5421126484870911, |
|
"learning_rate": 3.760813172726457e-07, |
|
"loss": 0.1988, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08020050125313283, |
|
"grad_norm": 0.22047364711761475, |
|
"learning_rate": 2.41056171020555e-07, |
|
"loss": 0.1636, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0810359231411863, |
|
"grad_norm": 0.2979952096939087, |
|
"learning_rate": 1.357535734809795e-07, |
|
"loss": 0.1766, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08187134502923976, |
|
"grad_norm": 0.4420830309391022, |
|
"learning_rate": 6.038559007141397e-08, |
|
"loss": 0.2073, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08270676691729323, |
|
"grad_norm": 0.8136470913887024, |
|
"learning_rate": 1.510400188028116e-08, |
|
"loss": 0.2351, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0835421888053467, |
|
"grad_norm": 0.37833961844444275, |
|
"learning_rate": 0.0, |
|
"loss": 0.1832, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0835421888053467, |
|
"eval_loss": 0.15805459022521973, |
|
"eval_runtime": 39.1836, |
|
"eval_samples_per_second": 12.863, |
|
"eval_steps_per_second": 3.216, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3812411056128e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|