{ "best_metric": 0.11873143911361694, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.7312049433573635, "eval_steps": 25, "global_step": 142, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005149330587023687, "grad_norm": 1.9118549823760986, "learning_rate": 2e-05, "loss": 0.8413, "step": 1 }, { "epoch": 0.005149330587023687, "eval_loss": 0.8488772511482239, "eval_runtime": 0.4861, "eval_samples_per_second": 102.863, "eval_steps_per_second": 6.172, "step": 1 }, { "epoch": 0.010298661174047374, "grad_norm": 1.8143242597579956, "learning_rate": 4e-05, "loss": 0.7566, "step": 2 }, { "epoch": 0.015447991761071062, "grad_norm": 1.861998200416565, "learning_rate": 6e-05, "loss": 0.7849, "step": 3 }, { "epoch": 0.02059732234809475, "grad_norm": 1.755989670753479, "learning_rate": 8e-05, "loss": 0.7278, "step": 4 }, { "epoch": 0.025746652935118436, "grad_norm": 1.1633100509643555, "learning_rate": 0.0001, "loss": 0.651, "step": 5 }, { "epoch": 0.030895983522142123, "grad_norm": 2.0457730293273926, "learning_rate": 9.99881689824633e-05, "loss": 0.5963, "step": 6 }, { "epoch": 0.03604531410916581, "grad_norm": 1.874700665473938, "learning_rate": 9.995268215087426e-05, "loss": 0.5782, "step": 7 }, { "epoch": 0.0411946446961895, "grad_norm": 1.2453465461730957, "learning_rate": 9.989355816502525e-05, "loss": 0.4915, "step": 8 }, { "epoch": 0.046343975283213185, "grad_norm": 1.1409831047058105, "learning_rate": 9.981082811366797e-05, "loss": 0.4244, "step": 9 }, { "epoch": 0.05149330587023687, "grad_norm": 0.8969383835792542, "learning_rate": 9.970453549816632e-05, "loss": 0.3625, "step": 10 }, { "epoch": 0.05664263645726056, "grad_norm": 1.0627210140228271, "learning_rate": 9.957473620962246e-05, "loss": 0.3223, "step": 11 }, { "epoch": 0.061791967044284246, "grad_norm": 0.9921000599861145, "learning_rate": 9.94214984994879e-05, "loss": 0.2614, "step": 12 }, { "epoch": 0.06694129763130793, "grad_norm": 1.0099488496780396, "learning_rate": 9.924490294367533e-05, "loss": 0.393, "step": 13 }, { "epoch": 0.07209062821833162, "grad_norm": 0.9516873359680176, "learning_rate": 9.904504240019e-05, "loss": 0.3875, "step": 14 }, { "epoch": 0.07723995880535531, "grad_norm": 0.7377267479896545, "learning_rate": 9.88220219603028e-05, "loss": 0.3633, "step": 15 }, { "epoch": 0.082389289392379, "grad_norm": 0.6139414310455322, "learning_rate": 9.85759588932908e-05, "loss": 0.3318, "step": 16 }, { "epoch": 0.08753861997940268, "grad_norm": 0.5736819505691528, "learning_rate": 9.830698258477458e-05, "loss": 0.3028, "step": 17 }, { "epoch": 0.09268795056642637, "grad_norm": 0.5561637282371521, "learning_rate": 9.801523446868399e-05, "loss": 0.3008, "step": 18 }, { "epoch": 0.09783728115345006, "grad_norm": 0.5371466279029846, "learning_rate": 9.770086795288913e-05, "loss": 0.2838, "step": 19 }, { "epoch": 0.10298661174047374, "grad_norm": 0.5292208194732666, "learning_rate": 9.736404833853502e-05, "loss": 0.2504, "step": 20 }, { "epoch": 0.10813594232749743, "grad_norm": 0.43221810460090637, "learning_rate": 9.700495273312223e-05, "loss": 0.2371, "step": 21 }, { "epoch": 0.11328527291452112, "grad_norm": 0.4533340334892273, "learning_rate": 9.662376995737989e-05, "loss": 0.2223, "step": 22 }, { "epoch": 0.1184346035015448, "grad_norm": 0.43220922350883484, "learning_rate": 9.622070044597935e-05, "loss": 0.2191, "step": 23 }, { "epoch": 0.12358393408856849, "grad_norm": 0.46570438146591187, "learning_rate": 9.579595614214087e-05, "loss": 0.1742, "step": 24 }, { "epoch": 0.12873326467559218, "grad_norm": 1.1133004426956177, "learning_rate": 9.534976038618931e-05, "loss": 0.1495, "step": 25 }, { "epoch": 0.12873326467559218, "eval_loss": 0.25227421522140503, "eval_runtime": 0.4861, "eval_samples_per_second": 102.86, "eval_steps_per_second": 6.172, "step": 25 }, { "epoch": 0.13388259526261587, "grad_norm": 0.7691997289657593, "learning_rate": 9.488234779811635e-05, "loss": 0.2972, "step": 26 }, { "epoch": 0.13903192584963955, "grad_norm": 1.2182092666625977, "learning_rate": 9.439396415421204e-05, "loss": 0.2965, "step": 27 }, { "epoch": 0.14418125643666324, "grad_norm": 0.8559730648994446, "learning_rate": 9.388486625782995e-05, "loss": 0.2557, "step": 28 }, { "epoch": 0.14933058702368693, "grad_norm": 0.47647181153297424, "learning_rate": 9.335532180435412e-05, "loss": 0.2372, "step": 29 }, { "epoch": 0.15447991761071062, "grad_norm": 0.3898983895778656, "learning_rate": 9.280560924043858e-05, "loss": 0.2253, "step": 30 }, { "epoch": 0.1596292481977343, "grad_norm": 0.42367926239967346, "learning_rate": 9.223601761759367e-05, "loss": 0.2083, "step": 31 }, { "epoch": 0.164778578784758, "grad_norm": 0.4791017472743988, "learning_rate": 9.164684644019624e-05, "loss": 0.2071, "step": 32 }, { "epoch": 0.16992790937178168, "grad_norm": 0.5360159277915955, "learning_rate": 9.103840550800329e-05, "loss": 0.2034, "step": 33 }, { "epoch": 0.17507723995880536, "grad_norm": 0.45532089471817017, "learning_rate": 9.041101475325209e-05, "loss": 0.1807, "step": 34 }, { "epoch": 0.18022657054582905, "grad_norm": 0.38769960403442383, "learning_rate": 8.976500407243247e-05, "loss": 0.1728, "step": 35 }, { "epoch": 0.18537590113285274, "grad_norm": 0.3485439419746399, "learning_rate": 8.910071315281975e-05, "loss": 0.1418, "step": 36 }, { "epoch": 0.19052523171987643, "grad_norm": 0.43333378434181213, "learning_rate": 8.841849129385921e-05, "loss": 0.1204, "step": 37 }, { "epoch": 0.1956745623069001, "grad_norm": 0.5296416878700256, "learning_rate": 8.771869722349651e-05, "loss": 0.24, "step": 38 }, { "epoch": 0.2008238928939238, "grad_norm": 0.3325974643230438, "learning_rate": 8.700169890955027e-05, "loss": 0.2236, "step": 39 }, { "epoch": 0.2059732234809475, "grad_norm": 0.42638543248176575, "learning_rate": 8.626787336622607e-05, "loss": 0.2151, "step": 40 }, { "epoch": 0.21112255406797117, "grad_norm": 0.4510488212108612, "learning_rate": 8.55176064558738e-05, "loss": 0.1921, "step": 41 }, { "epoch": 0.21627188465499486, "grad_norm": 0.422617107629776, "learning_rate": 8.475129268609227e-05, "loss": 0.1846, "step": 42 }, { "epoch": 0.22142121524201855, "grad_norm": 0.44664841890335083, "learning_rate": 8.396933500228808e-05, "loss": 0.1709, "step": 43 }, { "epoch": 0.22657054582904224, "grad_norm": 0.4472566246986389, "learning_rate": 8.317214457579773e-05, "loss": 0.1765, "step": 44 }, { "epoch": 0.23171987641606592, "grad_norm": 0.3076079487800598, "learning_rate": 8.23601405876841e-05, "loss": 0.1503, "step": 45 }, { "epoch": 0.2368692070030896, "grad_norm": 0.3404317796230316, "learning_rate": 8.153375000832157e-05, "loss": 0.1423, "step": 46 }, { "epoch": 0.2420185375901133, "grad_norm": 0.3234133720397949, "learning_rate": 8.069340737288512e-05, "loss": 0.1369, "step": 47 }, { "epoch": 0.24716786817713698, "grad_norm": 0.38141271471977234, "learning_rate": 7.98395545528617e-05, "loss": 0.1283, "step": 48 }, { "epoch": 0.25231719876416064, "grad_norm": 0.559747576713562, "learning_rate": 7.897264052370409e-05, "loss": 0.1243, "step": 49 }, { "epoch": 0.25746652935118436, "grad_norm": 0.7120130062103271, "learning_rate": 7.809312112874924e-05, "loss": 0.0913, "step": 50 }, { "epoch": 0.25746652935118436, "eval_loss": 0.15993443131446838, "eval_runtime": 0.4874, "eval_samples_per_second": 102.589, "eval_steps_per_second": 6.155, "step": 50 }, { "epoch": 0.262615859938208, "grad_norm": 0.6971345543861389, "learning_rate": 7.720145883952544e-05, "loss": 0.3077, "step": 51 }, { "epoch": 0.26776519052523173, "grad_norm": 0.4453997313976288, "learning_rate": 7.629812251257401e-05, "loss": 0.1954, "step": 52 }, { "epoch": 0.2729145211122554, "grad_norm": 0.3085014224052429, "learning_rate": 7.53835871429139e-05, "loss": 0.1709, "step": 53 }, { "epoch": 0.2780638516992791, "grad_norm": 0.34796738624572754, "learning_rate": 7.445833361427828e-05, "loss": 0.1641, "step": 54 }, { "epoch": 0.28321318228630277, "grad_norm": 0.4266407787799835, "learning_rate": 7.352284844625481e-05, "loss": 0.1585, "step": 55 }, { "epoch": 0.2883625128733265, "grad_norm": 0.5182251930236816, "learning_rate": 7.257762353846257e-05, "loss": 0.1609, "step": 56 }, { "epoch": 0.29351184346035014, "grad_norm": 0.4957202672958374, "learning_rate": 7.162315591189978e-05, "loss": 0.1558, "step": 57 }, { "epoch": 0.29866117404737386, "grad_norm": 0.4565785527229309, "learning_rate": 7.065994744759879e-05, "loss": 0.1437, "step": 58 }, { "epoch": 0.3038105046343975, "grad_norm": 0.36737626791000366, "learning_rate": 6.96885046227255e-05, "loss": 0.1326, "step": 59 }, { "epoch": 0.30895983522142123, "grad_norm": 0.2986319363117218, "learning_rate": 6.8709338244262e-05, "loss": 0.1241, "step": 60 }, { "epoch": 0.3141091658084449, "grad_norm": 0.31537097692489624, "learning_rate": 6.772296318041253e-05, "loss": 0.119, "step": 61 }, { "epoch": 0.3192584963954686, "grad_norm": 0.3971193730831146, "learning_rate": 6.672989808987385e-05, "loss": 0.102, "step": 62 }, { "epoch": 0.32440782698249226, "grad_norm": 0.4525837004184723, "learning_rate": 6.573066514911273e-05, "loss": 0.2455, "step": 63 }, { "epoch": 0.329557157569516, "grad_norm": 0.31870609521865845, "learning_rate": 6.472578977779339e-05, "loss": 0.1692, "step": 64 }, { "epoch": 0.33470648815653964, "grad_norm": 0.43375498056411743, "learning_rate": 6.371580036249985e-05, "loss": 0.1665, "step": 65 }, { "epoch": 0.33985581874356335, "grad_norm": 0.5092880129814148, "learning_rate": 6.270122797889806e-05, "loss": 0.1618, "step": 66 }, { "epoch": 0.345005149330587, "grad_norm": 0.415477454662323, "learning_rate": 6.168260611248417e-05, "loss": 0.1537, "step": 67 }, { "epoch": 0.35015447991761073, "grad_norm": 0.37228500843048096, "learning_rate": 6.066047037806549e-05, "loss": 0.1468, "step": 68 }, { "epoch": 0.3553038105046344, "grad_norm": 0.35446012020111084, "learning_rate": 5.9635358238121954e-05, "loss": 0.1484, "step": 69 }, { "epoch": 0.3604531410916581, "grad_norm": 0.2916114032268524, "learning_rate": 5.860780872019601e-05, "loss": 0.1268, "step": 70 }, { "epoch": 0.36560247167868176, "grad_norm": 0.3278571367263794, "learning_rate": 5.7578362133459494e-05, "loss": 0.1241, "step": 71 }, { "epoch": 0.3707518022657055, "grad_norm": 0.30740585923194885, "learning_rate": 5.6547559784606675e-05, "loss": 0.1152, "step": 72 }, { "epoch": 0.37590113285272914, "grad_norm": 0.27809590101242065, "learning_rate": 5.551594369322271e-05, "loss": 0.1017, "step": 73 }, { "epoch": 0.38105046343975285, "grad_norm": 0.36339494585990906, "learning_rate": 5.44840563067773e-05, "loss": 0.0868, "step": 74 }, { "epoch": 0.3861997940267765, "grad_norm": 0.3924078643321991, "learning_rate": 5.3452440215393315e-05, "loss": 0.0638, "step": 75 }, { "epoch": 0.3861997940267765, "eval_loss": 0.13260947167873383, "eval_runtime": 0.4867, "eval_samples_per_second": 102.736, "eval_steps_per_second": 6.164, "step": 75 }, { "epoch": 0.3913491246138002, "grad_norm": 0.6179808378219604, "learning_rate": 5.242163786654051e-05, "loss": 0.2107, "step": 76 }, { "epoch": 0.3964984552008239, "grad_norm": 0.43608739972114563, "learning_rate": 5.139219127980399e-05, "loss": 0.1587, "step": 77 }, { "epoch": 0.4016477857878476, "grad_norm": 0.3661559224128723, "learning_rate": 5.036464176187806e-05, "loss": 0.1553, "step": 78 }, { "epoch": 0.40679711637487126, "grad_norm": 0.41823190450668335, "learning_rate": 4.933952962193452e-05, "loss": 0.1505, "step": 79 }, { "epoch": 0.411946446961895, "grad_norm": 0.41759902238845825, "learning_rate": 4.831739388751584e-05, "loss": 0.1402, "step": 80 }, { "epoch": 0.41709577754891863, "grad_norm": 0.37053796648979187, "learning_rate": 4.729877202110195e-05, "loss": 0.1305, "step": 81 }, { "epoch": 0.42224510813594235, "grad_norm": 0.3314639627933502, "learning_rate": 4.628419963750016e-05, "loss": 0.1211, "step": 82 }, { "epoch": 0.427394438722966, "grad_norm": 0.2696942389011383, "learning_rate": 4.527421022220663e-05, "loss": 0.1101, "step": 83 }, { "epoch": 0.4325437693099897, "grad_norm": 0.32313790917396545, "learning_rate": 4.426933485088729e-05, "loss": 0.1098, "step": 84 }, { "epoch": 0.4376930998970134, "grad_norm": 0.289420485496521, "learning_rate": 4.327010191012617e-05, "loss": 0.1033, "step": 85 }, { "epoch": 0.4428424304840371, "grad_norm": 0.25251471996307373, "learning_rate": 4.227703681958749e-05, "loss": 0.0839, "step": 86 }, { "epoch": 0.44799176107106076, "grad_norm": 0.2926040291786194, "learning_rate": 4.1290661755738e-05, "loss": 0.0633, "step": 87 }, { "epoch": 0.45314109165808447, "grad_norm": 0.42090505361557007, "learning_rate": 4.03114953772745e-05, "loss": 0.1778, "step": 88 }, { "epoch": 0.45829042224510813, "grad_norm": 0.3921765983104706, "learning_rate": 3.934005255240122e-05, "loss": 0.1554, "step": 89 }, { "epoch": 0.46343975283213185, "grad_norm": 0.22954587638378143, "learning_rate": 3.837684408810023e-05, "loss": 0.1447, "step": 90 }, { "epoch": 0.4685890834191555, "grad_norm": 0.32881876826286316, "learning_rate": 3.7422376461537435e-05, "loss": 0.132, "step": 91 }, { "epoch": 0.4737384140061792, "grad_norm": 0.3419657051563263, "learning_rate": 3.647715155374519e-05, "loss": 0.1329, "step": 92 }, { "epoch": 0.4788877445932029, "grad_norm": 0.30170801281929016, "learning_rate": 3.554166638572175e-05, "loss": 0.1174, "step": 93 }, { "epoch": 0.4840370751802266, "grad_norm": 0.3186221718788147, "learning_rate": 3.461641285708611e-05, "loss": 0.1174, "step": 94 }, { "epoch": 0.48918640576725025, "grad_norm": 0.2956952452659607, "learning_rate": 3.370187748742601e-05, "loss": 0.1085, "step": 95 }, { "epoch": 0.49433573635427397, "grad_norm": 0.25691288709640503, "learning_rate": 3.279854116047457e-05, "loss": 0.096, "step": 96 }, { "epoch": 0.49948506694129763, "grad_norm": 0.25471076369285583, "learning_rate": 3.190687887125077e-05, "loss": 0.0961, "step": 97 }, { "epoch": 0.5046343975283213, "grad_norm": 0.24956409633159637, "learning_rate": 3.102735947629594e-05, "loss": 0.0912, "step": 98 }, { "epoch": 0.509783728115345, "grad_norm": 0.2942916452884674, "learning_rate": 3.0160445447138308e-05, "loss": 0.0773, "step": 99 }, { "epoch": 0.5149330587023687, "grad_norm": 0.42962557077407837, "learning_rate": 2.9306592627114883e-05, "loss": 0.0568, "step": 100 }, { "epoch": 0.5149330587023687, "eval_loss": 0.11873143911361694, "eval_runtime": 0.4851, "eval_samples_per_second": 103.066, "eval_steps_per_second": 6.184, "step": 100 }, { "epoch": 0.5200823892893924, "grad_norm": 0.32270699739456177, "learning_rate": 2.846624999167843e-05, "loss": 0.1637, "step": 101 }, { "epoch": 0.525231719876416, "grad_norm": 0.3115319609642029, "learning_rate": 2.7639859412315917e-05, "loss": 0.1418, "step": 102 }, { "epoch": 0.5303810504634398, "grad_norm": 0.26950860023498535, "learning_rate": 2.682785542420229e-05, "loss": 0.1285, "step": 103 }, { "epoch": 0.5355303810504635, "grad_norm": 0.24307739734649658, "learning_rate": 2.603066499771192e-05, "loss": 0.1333, "step": 104 }, { "epoch": 0.5406797116374872, "grad_norm": 0.2244795262813568, "learning_rate": 2.5248707313907747e-05, "loss": 0.1211, "step": 105 }, { "epoch": 0.5458290422245108, "grad_norm": 0.2730483114719391, "learning_rate": 2.4482393544126215e-05, "loss": 0.1246, "step": 106 }, { "epoch": 0.5509783728115345, "grad_norm": 0.24573828279972076, "learning_rate": 2.3732126633773928e-05, "loss": 0.1104, "step": 107 }, { "epoch": 0.5561277033985582, "grad_norm": 0.24852481484413147, "learning_rate": 2.2998301090449738e-05, "loss": 0.0935, "step": 108 }, { "epoch": 0.5612770339855818, "grad_norm": 0.2701282799243927, "learning_rate": 2.2281302776503497e-05, "loss": 0.1032, "step": 109 }, { "epoch": 0.5664263645726055, "grad_norm": 0.33753320574760437, "learning_rate": 2.1581508706140802e-05, "loss": 0.1071, "step": 110 }, { "epoch": 0.5715756951596292, "grad_norm": 0.36751407384872437, "learning_rate": 2.0899286847180243e-05, "loss": 0.0919, "step": 111 }, { "epoch": 0.576725025746653, "grad_norm": 0.3343943953514099, "learning_rate": 2.0234995927567523e-05, "loss": 0.0607, "step": 112 }, { "epoch": 0.5818743563336766, "grad_norm": 0.30756524205207825, "learning_rate": 1.9588985246747925e-05, "loss": 0.173, "step": 113 }, { "epoch": 0.5870236869207003, "grad_norm": 0.251208633184433, "learning_rate": 1.896159449199672e-05, "loss": 0.1496, "step": 114 }, { "epoch": 0.592173017507724, "grad_norm": 0.22547647356987, "learning_rate": 1.835315355980376e-05, "loss": 0.1253, "step": 115 }, { "epoch": 0.5973223480947477, "grad_norm": 0.2488126903772354, "learning_rate": 1.7763982382406352e-05, "loss": 0.1204, "step": 116 }, { "epoch": 0.6024716786817713, "grad_norm": 0.2688787579536438, "learning_rate": 1.7194390759561453e-05, "loss": 0.1151, "step": 117 }, { "epoch": 0.607621009268795, "grad_norm": 0.24907900393009186, "learning_rate": 1.664467819564588e-05, "loss": 0.1098, "step": 118 }, { "epoch": 0.6127703398558187, "grad_norm": 0.25353243947029114, "learning_rate": 1.6115133742170053e-05, "loss": 0.1112, "step": 119 }, { "epoch": 0.6179196704428425, "grad_norm": 0.21995492279529572, "learning_rate": 1.5606035845787987e-05, "loss": 0.0976, "step": 120 }, { "epoch": 0.6230690010298661, "grad_norm": 0.22032539546489716, "learning_rate": 1.511765220188367e-05, "loss": 0.087, "step": 121 }, { "epoch": 0.6282183316168898, "grad_norm": 0.2590661942958832, "learning_rate": 1.4650239613810693e-05, "loss": 0.088, "step": 122 }, { "epoch": 0.6333676622039135, "grad_norm": 0.2637864351272583, "learning_rate": 1.4204043857859129e-05, "loss": 0.0884, "step": 123 }, { "epoch": 0.6385169927909372, "grad_norm": 0.2572005093097687, "learning_rate": 1.3779299554020672e-05, "loss": 0.0735, "step": 124 }, { "epoch": 0.6436663233779608, "grad_norm": 0.26129183173179626, "learning_rate": 1.3376230042620109e-05, "loss": 0.0471, "step": 125 }, { "epoch": 0.6436663233779608, "eval_loss": 0.10684148967266083, "eval_runtime": 0.4867, "eval_samples_per_second": 102.737, "eval_steps_per_second": 6.164, "step": 125 }, { "epoch": 0.6488156539649845, "grad_norm": 0.26877304911613464, "learning_rate": 1.2995047266877775e-05, "loss": 0.1827, "step": 126 }, { "epoch": 0.6539649845520082, "grad_norm": 0.2039293497800827, "learning_rate": 1.2635951661464995e-05, "loss": 0.1372, "step": 127 }, { "epoch": 0.659114315139032, "grad_norm": 0.20893456041812897, "learning_rate": 1.2299132047110876e-05, "loss": 0.1187, "step": 128 }, { "epoch": 0.6642636457260556, "grad_norm": 0.2243366241455078, "learning_rate": 1.1984765531316038e-05, "loss": 0.118, "step": 129 }, { "epoch": 0.6694129763130793, "grad_norm": 0.2364204078912735, "learning_rate": 1.1693017415225432e-05, "loss": 0.1196, "step": 130 }, { "epoch": 0.674562306900103, "grad_norm": 0.2659465968608856, "learning_rate": 1.1424041106709194e-05, "loss": 0.1104, "step": 131 }, { "epoch": 0.6797116374871267, "grad_norm": 0.22487813234329224, "learning_rate": 1.1177978039697217e-05, "loss": 0.1002, "step": 132 }, { "epoch": 0.6848609680741503, "grad_norm": 0.22705155611038208, "learning_rate": 1.0954957599810003e-05, "loss": 0.1075, "step": 133 }, { "epoch": 0.690010298661174, "grad_norm": 0.2651651203632355, "learning_rate": 1.0755097056324672e-05, "loss": 0.1095, "step": 134 }, { "epoch": 0.6951596292481977, "grad_norm": 0.25540125370025635, "learning_rate": 1.0578501500512109e-05, "loss": 0.0937, "step": 135 }, { "epoch": 0.7003089598352215, "grad_norm": 0.2261316478252411, "learning_rate": 1.042526379037754e-05, "loss": 0.0741, "step": 136 }, { "epoch": 0.7054582904222451, "grad_norm": 0.21159891784191132, "learning_rate": 1.0295464501833682e-05, "loss": 0.0481, "step": 137 }, { "epoch": 0.7106076210092688, "grad_norm": 0.26530131697654724, "learning_rate": 1.0189171886332038e-05, "loss": 0.1346, "step": 138 }, { "epoch": 0.7157569515962925, "grad_norm": 0.3140278458595276, "learning_rate": 1.0106441834974748e-05, "loss": 0.1296, "step": 139 }, { "epoch": 0.7209062821833162, "grad_norm": 0.24784407019615173, "learning_rate": 1.0047317849125743e-05, "loss": 0.1294, "step": 140 }, { "epoch": 0.7260556127703398, "grad_norm": 0.19761891663074493, "learning_rate": 1.0011831017536722e-05, "loss": 0.1212, "step": 141 }, { "epoch": 0.7312049433573635, "grad_norm": 0.2117881029844284, "learning_rate": 1e-05, "loss": 0.1272, "step": 142 } ], "logging_steps": 1, "max_steps": 142, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.22678433233109e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }