{ "best_metric": 2.045283317565918, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.0198373338623289, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.918666931164451e-05, "grad_norm": 0.31591522693634033, "learning_rate": 1.0060000000000002e-05, "loss": 1.7954, "step": 1 }, { "epoch": 9.918666931164451e-05, "eval_loss": 2.1422743797302246, "eval_runtime": 481.1147, "eval_samples_per_second": 8.823, "eval_steps_per_second": 2.207, "step": 1 }, { "epoch": 0.00019837333862328903, "grad_norm": 0.3503745496273041, "learning_rate": 2.0120000000000004e-05, "loss": 1.9533, "step": 2 }, { "epoch": 0.00029756000793493357, "grad_norm": 0.36861494183540344, "learning_rate": 3.018e-05, "loss": 2.0738, "step": 3 }, { "epoch": 0.00039674667724657806, "grad_norm": 0.3805672228336334, "learning_rate": 4.024000000000001e-05, "loss": 2.2122, "step": 4 }, { "epoch": 0.0004959333465582226, "grad_norm": 0.4056474268436432, "learning_rate": 5.03e-05, "loss": 2.4175, "step": 5 }, { "epoch": 0.0005951200158698671, "grad_norm": 0.3867761492729187, "learning_rate": 6.036e-05, "loss": 2.0798, "step": 6 }, { "epoch": 0.0006943066851815116, "grad_norm": 0.44038522243499756, "learning_rate": 7.042e-05, "loss": 2.204, "step": 7 }, { "epoch": 0.0007934933544931561, "grad_norm": 0.4558636248111725, "learning_rate": 8.048000000000002e-05, "loss": 1.9656, "step": 8 }, { "epoch": 0.0008926800238048007, "grad_norm": 0.4567602872848511, "learning_rate": 9.054000000000001e-05, "loss": 1.836, "step": 9 }, { "epoch": 0.0009918666931164452, "grad_norm": 0.4776064455509186, "learning_rate": 0.0001006, "loss": 1.9358, "step": 10 }, { "epoch": 0.0010910533624280897, "grad_norm": 0.4712127447128296, "learning_rate": 0.00010007052631578948, "loss": 2.0418, "step": 11 }, { "epoch": 0.0011902400317397343, "grad_norm": 0.4415411055088043, "learning_rate": 9.954105263157895e-05, "loss": 1.8885, "step": 12 }, { "epoch": 0.0012894267010513786, "grad_norm": 0.4946366250514984, "learning_rate": 9.901157894736842e-05, "loss": 2.1354, "step": 13 }, { "epoch": 0.0013886133703630231, "grad_norm": 0.443600058555603, "learning_rate": 9.84821052631579e-05, "loss": 1.6907, "step": 14 }, { "epoch": 0.0014878000396746677, "grad_norm": 0.5230553150177002, "learning_rate": 9.795263157894737e-05, "loss": 2.3476, "step": 15 }, { "epoch": 0.0015869867089863122, "grad_norm": 0.5645577907562256, "learning_rate": 9.742315789473686e-05, "loss": 2.0178, "step": 16 }, { "epoch": 0.0016861733782979568, "grad_norm": 0.49777883291244507, "learning_rate": 9.689368421052633e-05, "loss": 1.9202, "step": 17 }, { "epoch": 0.0017853600476096013, "grad_norm": 0.5200148820877075, "learning_rate": 9.63642105263158e-05, "loss": 2.1958, "step": 18 }, { "epoch": 0.0018845467169212459, "grad_norm": 0.5237608551979065, "learning_rate": 9.583473684210527e-05, "loss": 2.1429, "step": 19 }, { "epoch": 0.0019837333862328904, "grad_norm": 0.5042146444320679, "learning_rate": 9.530526315789474e-05, "loss": 1.9252, "step": 20 }, { "epoch": 0.002082920055544535, "grad_norm": 0.5257642269134521, "learning_rate": 9.477578947368422e-05, "loss": 2.042, "step": 21 }, { "epoch": 0.0021821067248561795, "grad_norm": 0.5433064699172974, "learning_rate": 9.424631578947369e-05, "loss": 2.0178, "step": 22 }, { "epoch": 0.002281293394167824, "grad_norm": 0.6098982691764832, "learning_rate": 9.371684210526316e-05, "loss": 2.043, "step": 23 }, { "epoch": 0.0023804800634794686, "grad_norm": 0.5892771482467651, "learning_rate": 9.318736842105263e-05, "loss": 2.044, "step": 24 }, { "epoch": 0.0024796667327911127, "grad_norm": 0.5854699611663818, "learning_rate": 9.26578947368421e-05, "loss": 2.4106, "step": 25 }, { "epoch": 0.002578853402102757, "grad_norm": 0.5785255432128906, "learning_rate": 9.212842105263159e-05, "loss": 1.9775, "step": 26 }, { "epoch": 0.0026780400714144018, "grad_norm": 0.5783630609512329, "learning_rate": 9.159894736842107e-05, "loss": 2.2172, "step": 27 }, { "epoch": 0.0027772267407260463, "grad_norm": 0.5924263596534729, "learning_rate": 9.106947368421054e-05, "loss": 2.3351, "step": 28 }, { "epoch": 0.002876413410037691, "grad_norm": 0.5702072381973267, "learning_rate": 9.054000000000001e-05, "loss": 2.1998, "step": 29 }, { "epoch": 0.0029756000793493354, "grad_norm": 0.5693298578262329, "learning_rate": 9.001052631578948e-05, "loss": 1.9428, "step": 30 }, { "epoch": 0.00307478674866098, "grad_norm": 0.553013265132904, "learning_rate": 8.948105263157895e-05, "loss": 1.9306, "step": 31 }, { "epoch": 0.0031739734179726245, "grad_norm": 0.6651368141174316, "learning_rate": 8.895157894736842e-05, "loss": 2.0397, "step": 32 }, { "epoch": 0.003273160087284269, "grad_norm": 0.6553846001625061, "learning_rate": 8.842210526315789e-05, "loss": 2.1973, "step": 33 }, { "epoch": 0.0033723467565959135, "grad_norm": 0.6568091511726379, "learning_rate": 8.789263157894738e-05, "loss": 1.9502, "step": 34 }, { "epoch": 0.003471533425907558, "grad_norm": 0.7385371327400208, "learning_rate": 8.736315789473685e-05, "loss": 2.2147, "step": 35 }, { "epoch": 0.0035707200952192026, "grad_norm": 0.6190497875213623, "learning_rate": 8.683368421052632e-05, "loss": 2.3162, "step": 36 }, { "epoch": 0.003669906764530847, "grad_norm": 0.6054490804672241, "learning_rate": 8.63042105263158e-05, "loss": 1.9026, "step": 37 }, { "epoch": 0.0037690934338424917, "grad_norm": 0.6607844233512878, "learning_rate": 8.577473684210527e-05, "loss": 2.1208, "step": 38 }, { "epoch": 0.0038682801031541363, "grad_norm": 0.7357839941978455, "learning_rate": 8.524526315789474e-05, "loss": 2.2168, "step": 39 }, { "epoch": 0.003967466772465781, "grad_norm": 0.7413284182548523, "learning_rate": 8.471578947368421e-05, "loss": 2.0931, "step": 40 }, { "epoch": 0.004066653441777425, "grad_norm": 0.7085254788398743, "learning_rate": 8.41863157894737e-05, "loss": 2.1672, "step": 41 }, { "epoch": 0.00416584011108907, "grad_norm": 0.7019400596618652, "learning_rate": 8.365684210526317e-05, "loss": 2.0547, "step": 42 }, { "epoch": 0.004265026780400714, "grad_norm": 0.7577438950538635, "learning_rate": 8.312736842105264e-05, "loss": 2.0381, "step": 43 }, { "epoch": 0.004364213449712359, "grad_norm": 0.7274919748306274, "learning_rate": 8.259789473684211e-05, "loss": 2.0812, "step": 44 }, { "epoch": 0.0044634001190240035, "grad_norm": 0.8398061394691467, "learning_rate": 8.206842105263158e-05, "loss": 2.0074, "step": 45 }, { "epoch": 0.004562586788335648, "grad_norm": 0.9707539677619934, "learning_rate": 8.153894736842105e-05, "loss": 2.2341, "step": 46 }, { "epoch": 0.004661773457647293, "grad_norm": 0.8283779621124268, "learning_rate": 8.100947368421053e-05, "loss": 1.8799, "step": 47 }, { "epoch": 0.004760960126958937, "grad_norm": 1.0051816701889038, "learning_rate": 8.048000000000002e-05, "loss": 1.9985, "step": 48 }, { "epoch": 0.004860146796270581, "grad_norm": 2.010098457336426, "learning_rate": 7.995052631578949e-05, "loss": 1.7956, "step": 49 }, { "epoch": 0.004959333465582225, "grad_norm": 2.6211178302764893, "learning_rate": 7.942105263157896e-05, "loss": 1.7728, "step": 50 }, { "epoch": 0.004959333465582225, "eval_loss": 2.1202521324157715, "eval_runtime": 452.6675, "eval_samples_per_second": 9.378, "eval_steps_per_second": 2.346, "step": 50 }, { "epoch": 0.00505852013489387, "grad_norm": 0.6983906626701355, "learning_rate": 7.889157894736843e-05, "loss": 1.6959, "step": 51 }, { "epoch": 0.005157706804205514, "grad_norm": 0.7728566527366638, "learning_rate": 7.83621052631579e-05, "loss": 2.0998, "step": 52 }, { "epoch": 0.005256893473517159, "grad_norm": 0.697317898273468, "learning_rate": 7.783263157894737e-05, "loss": 2.0257, "step": 53 }, { "epoch": 0.0053560801428288035, "grad_norm": 0.5695851445198059, "learning_rate": 7.730315789473684e-05, "loss": 2.0167, "step": 54 }, { "epoch": 0.005455266812140448, "grad_norm": 0.469310462474823, "learning_rate": 7.677368421052632e-05, "loss": 2.0559, "step": 55 }, { "epoch": 0.005554453481452093, "grad_norm": 0.4206158518791199, "learning_rate": 7.624421052631579e-05, "loss": 2.0969, "step": 56 }, { "epoch": 0.005653640150763737, "grad_norm": 0.4133077561855316, "learning_rate": 7.571473684210526e-05, "loss": 2.2379, "step": 57 }, { "epoch": 0.005752826820075382, "grad_norm": 0.4213486611843109, "learning_rate": 7.518526315789475e-05, "loss": 2.1441, "step": 58 }, { "epoch": 0.005852013489387026, "grad_norm": 0.42303550243377686, "learning_rate": 7.465578947368422e-05, "loss": 1.9846, "step": 59 }, { "epoch": 0.005951200158698671, "grad_norm": 0.45184460282325745, "learning_rate": 7.412631578947369e-05, "loss": 2.0865, "step": 60 }, { "epoch": 0.006050386828010315, "grad_norm": 0.45460712909698486, "learning_rate": 7.359684210526317e-05, "loss": 2.235, "step": 61 }, { "epoch": 0.00614957349732196, "grad_norm": 0.4863188862800598, "learning_rate": 7.306736842105264e-05, "loss": 2.1843, "step": 62 }, { "epoch": 0.006248760166633604, "grad_norm": 0.4348534643650055, "learning_rate": 7.253789473684211e-05, "loss": 1.8581, "step": 63 }, { "epoch": 0.006347946835945249, "grad_norm": 0.43499282002449036, "learning_rate": 7.200842105263158e-05, "loss": 2.1743, "step": 64 }, { "epoch": 0.0064471335052568935, "grad_norm": 0.443194180727005, "learning_rate": 7.147894736842105e-05, "loss": 1.9319, "step": 65 }, { "epoch": 0.006546320174568538, "grad_norm": 0.4738743305206299, "learning_rate": 7.094947368421052e-05, "loss": 1.8147, "step": 66 }, { "epoch": 0.0066455068438801826, "grad_norm": 0.484662264585495, "learning_rate": 7.042e-05, "loss": 1.8934, "step": 67 }, { "epoch": 0.006744693513191827, "grad_norm": 0.4770859181880951, "learning_rate": 6.989052631578948e-05, "loss": 1.9145, "step": 68 }, { "epoch": 0.006843880182503472, "grad_norm": 0.5535433888435364, "learning_rate": 6.936105263157896e-05, "loss": 2.0795, "step": 69 }, { "epoch": 0.006943066851815116, "grad_norm": 0.48283520340919495, "learning_rate": 6.883157894736843e-05, "loss": 1.9492, "step": 70 }, { "epoch": 0.007042253521126761, "grad_norm": 0.4779215455055237, "learning_rate": 6.83021052631579e-05, "loss": 1.9317, "step": 71 }, { "epoch": 0.007141440190438405, "grad_norm": 0.4880412518978119, "learning_rate": 6.777263157894737e-05, "loss": 2.0118, "step": 72 }, { "epoch": 0.00724062685975005, "grad_norm": 0.49712997674942017, "learning_rate": 6.724315789473684e-05, "loss": 2.1046, "step": 73 }, { "epoch": 0.007339813529061694, "grad_norm": 0.519180417060852, "learning_rate": 6.671368421052631e-05, "loss": 2.0649, "step": 74 }, { "epoch": 0.007439000198373339, "grad_norm": 0.5260021686553955, "learning_rate": 6.61842105263158e-05, "loss": 1.8441, "step": 75 }, { "epoch": 0.007538186867684983, "grad_norm": 0.5280306339263916, "learning_rate": 6.565473684210527e-05, "loss": 2.2397, "step": 76 }, { "epoch": 0.007637373536996628, "grad_norm": 0.5007418394088745, "learning_rate": 6.512526315789474e-05, "loss": 2.1125, "step": 77 }, { "epoch": 0.0077365602063082725, "grad_norm": 0.5479311347007751, "learning_rate": 6.459578947368421e-05, "loss": 1.9533, "step": 78 }, { "epoch": 0.007835746875619916, "grad_norm": 0.5591127276420593, "learning_rate": 6.406631578947369e-05, "loss": 1.9261, "step": 79 }, { "epoch": 0.007934933544931562, "grad_norm": 0.5724272131919861, "learning_rate": 6.353684210526316e-05, "loss": 2.1247, "step": 80 }, { "epoch": 0.008034120214243205, "grad_norm": 0.5812843441963196, "learning_rate": 6.300736842105263e-05, "loss": 2.0034, "step": 81 }, { "epoch": 0.00813330688355485, "grad_norm": 0.5678356885910034, "learning_rate": 6.247789473684212e-05, "loss": 2.2249, "step": 82 }, { "epoch": 0.008232493552866494, "grad_norm": 0.5913679003715515, "learning_rate": 6.194842105263159e-05, "loss": 2.1256, "step": 83 }, { "epoch": 0.00833168022217814, "grad_norm": 0.5659611225128174, "learning_rate": 6.141894736842106e-05, "loss": 2.205, "step": 84 }, { "epoch": 0.008430866891489783, "grad_norm": 0.5554997324943542, "learning_rate": 6.088947368421053e-05, "loss": 1.8814, "step": 85 }, { "epoch": 0.008530053560801429, "grad_norm": 0.6280261874198914, "learning_rate": 6.036e-05, "loss": 2.2815, "step": 86 }, { "epoch": 0.008629240230113073, "grad_norm": 0.6290625333786011, "learning_rate": 5.9830526315789475e-05, "loss": 1.8478, "step": 87 }, { "epoch": 0.008728426899424718, "grad_norm": 0.6303794384002686, "learning_rate": 5.9301052631578946e-05, "loss": 1.8825, "step": 88 }, { "epoch": 0.008827613568736362, "grad_norm": 0.6564882397651672, "learning_rate": 5.877157894736843e-05, "loss": 2.1155, "step": 89 }, { "epoch": 0.008926800238048007, "grad_norm": 0.6798752546310425, "learning_rate": 5.82421052631579e-05, "loss": 2.0635, "step": 90 }, { "epoch": 0.00902598690735965, "grad_norm": 0.6318185925483704, "learning_rate": 5.771263157894737e-05, "loss": 2.0311, "step": 91 }, { "epoch": 0.009125173576671296, "grad_norm": 0.7091951966285706, "learning_rate": 5.718315789473685e-05, "loss": 2.2337, "step": 92 }, { "epoch": 0.00922436024598294, "grad_norm": 0.6701398491859436, "learning_rate": 5.665368421052632e-05, "loss": 1.9142, "step": 93 }, { "epoch": 0.009323546915294585, "grad_norm": 0.7093063592910767, "learning_rate": 5.612421052631579e-05, "loss": 1.9709, "step": 94 }, { "epoch": 0.009422733584606229, "grad_norm": 0.807305634021759, "learning_rate": 5.559473684210527e-05, "loss": 2.2168, "step": 95 }, { "epoch": 0.009521920253917874, "grad_norm": 0.7795900702476501, "learning_rate": 5.506526315789474e-05, "loss": 1.6774, "step": 96 }, { "epoch": 0.009621106923229518, "grad_norm": 0.804345965385437, "learning_rate": 5.453578947368421e-05, "loss": 1.9061, "step": 97 }, { "epoch": 0.009720293592541162, "grad_norm": 0.8429539203643799, "learning_rate": 5.400631578947369e-05, "loss": 2.0082, "step": 98 }, { "epoch": 0.009819480261852807, "grad_norm": 1.1079602241516113, "learning_rate": 5.347684210526316e-05, "loss": 1.626, "step": 99 }, { "epoch": 0.00991866693116445, "grad_norm": 1.3489030599594116, "learning_rate": 5.294736842105263e-05, "loss": 1.495, "step": 100 }, { "epoch": 0.00991866693116445, "eval_loss": 2.0559380054473877, "eval_runtime": 452.8342, "eval_samples_per_second": 9.374, "eval_steps_per_second": 2.345, "step": 100 }, { "epoch": 0.010017853600476096, "grad_norm": 0.354360431432724, "learning_rate": 5.24178947368421e-05, "loss": 1.8484, "step": 101 }, { "epoch": 0.01011704026978774, "grad_norm": 0.3903191387653351, "learning_rate": 5.1888421052631585e-05, "loss": 2.089, "step": 102 }, { "epoch": 0.010216226939099385, "grad_norm": 0.4185899794101715, "learning_rate": 5.135894736842106e-05, "loss": 2.0572, "step": 103 }, { "epoch": 0.010315413608411029, "grad_norm": 0.3941899538040161, "learning_rate": 5.082947368421053e-05, "loss": 2.1271, "step": 104 }, { "epoch": 0.010414600277722674, "grad_norm": 0.42219480872154236, "learning_rate": 5.03e-05, "loss": 2.0565, "step": 105 }, { "epoch": 0.010513786947034318, "grad_norm": 0.45150065422058105, "learning_rate": 4.977052631578947e-05, "loss": 2.2626, "step": 106 }, { "epoch": 0.010612973616345963, "grad_norm": 0.42973950505256653, "learning_rate": 4.924105263157895e-05, "loss": 2.1764, "step": 107 }, { "epoch": 0.010712160285657607, "grad_norm": 0.48724910616874695, "learning_rate": 4.871157894736843e-05, "loss": 2.239, "step": 108 }, { "epoch": 0.010811346954969252, "grad_norm": 0.40696197748184204, "learning_rate": 4.81821052631579e-05, "loss": 2.157, "step": 109 }, { "epoch": 0.010910533624280896, "grad_norm": 0.43805986642837524, "learning_rate": 4.765263157894737e-05, "loss": 2.1515, "step": 110 }, { "epoch": 0.011009720293592542, "grad_norm": 0.44969242811203003, "learning_rate": 4.7123157894736845e-05, "loss": 2.1633, "step": 111 }, { "epoch": 0.011108906962904185, "grad_norm": 0.45557573437690735, "learning_rate": 4.6593684210526316e-05, "loss": 2.1757, "step": 112 }, { "epoch": 0.01120809363221583, "grad_norm": 0.4493902921676636, "learning_rate": 4.606421052631579e-05, "loss": 2.0955, "step": 113 }, { "epoch": 0.011307280301527474, "grad_norm": 0.4411601424217224, "learning_rate": 4.553473684210527e-05, "loss": 2.3974, "step": 114 }, { "epoch": 0.01140646697083912, "grad_norm": 0.4536522924900055, "learning_rate": 4.500526315789474e-05, "loss": 2.0787, "step": 115 }, { "epoch": 0.011505653640150763, "grad_norm": 0.49566513299942017, "learning_rate": 4.447578947368421e-05, "loss": 2.0914, "step": 116 }, { "epoch": 0.011604840309462409, "grad_norm": 0.44597601890563965, "learning_rate": 4.394631578947369e-05, "loss": 2.0562, "step": 117 }, { "epoch": 0.011704026978774052, "grad_norm": 0.4513428807258606, "learning_rate": 4.341684210526316e-05, "loss": 2.0668, "step": 118 }, { "epoch": 0.011803213648085698, "grad_norm": 0.42738619446754456, "learning_rate": 4.2887368421052636e-05, "loss": 2.0592, "step": 119 }, { "epoch": 0.011902400317397342, "grad_norm": 0.4794227182865143, "learning_rate": 4.2357894736842106e-05, "loss": 1.8783, "step": 120 }, { "epoch": 0.012001586986708987, "grad_norm": 0.4853605628013611, "learning_rate": 4.182842105263158e-05, "loss": 2.1364, "step": 121 }, { "epoch": 0.01210077365602063, "grad_norm": 0.4783886969089508, "learning_rate": 4.1298947368421053e-05, "loss": 1.9871, "step": 122 }, { "epoch": 0.012199960325332276, "grad_norm": 0.5324634313583374, "learning_rate": 4.0769473684210524e-05, "loss": 2.1027, "step": 123 }, { "epoch": 0.01229914699464392, "grad_norm": 0.5017367005348206, "learning_rate": 4.024000000000001e-05, "loss": 2.2615, "step": 124 }, { "epoch": 0.012398333663955565, "grad_norm": 0.5131402015686035, "learning_rate": 3.971052631578948e-05, "loss": 2.2073, "step": 125 }, { "epoch": 0.012497520333267209, "grad_norm": 0.5473915338516235, "learning_rate": 3.918105263157895e-05, "loss": 2.1229, "step": 126 }, { "epoch": 0.012596707002578854, "grad_norm": 0.503765881061554, "learning_rate": 3.865157894736842e-05, "loss": 2.056, "step": 127 }, { "epoch": 0.012695893671890498, "grad_norm": 0.5175101161003113, "learning_rate": 3.8122105263157896e-05, "loss": 2.0696, "step": 128 }, { "epoch": 0.012795080341202143, "grad_norm": 0.5564276576042175, "learning_rate": 3.759263157894737e-05, "loss": 1.9024, "step": 129 }, { "epoch": 0.012894267010513787, "grad_norm": 0.6425966024398804, "learning_rate": 3.7063157894736844e-05, "loss": 2.1861, "step": 130 }, { "epoch": 0.01299345367982543, "grad_norm": 0.5245392322540283, "learning_rate": 3.653368421052632e-05, "loss": 2.1792, "step": 131 }, { "epoch": 0.013092640349137076, "grad_norm": 0.5750947594642639, "learning_rate": 3.600421052631579e-05, "loss": 2.2775, "step": 132 }, { "epoch": 0.01319182701844872, "grad_norm": 0.6149414777755737, "learning_rate": 3.547473684210526e-05, "loss": 2.1948, "step": 133 }, { "epoch": 0.013291013687760365, "grad_norm": 0.5776863694190979, "learning_rate": 3.494526315789474e-05, "loss": 2.0224, "step": 134 }, { "epoch": 0.013390200357072009, "grad_norm": 0.5697036385536194, "learning_rate": 3.4415789473684216e-05, "loss": 2.247, "step": 135 }, { "epoch": 0.013489387026383654, "grad_norm": 0.6303629279136658, "learning_rate": 3.3886315789473686e-05, "loss": 2.1261, "step": 136 }, { "epoch": 0.013588573695695298, "grad_norm": 0.6330528259277344, "learning_rate": 3.3356842105263156e-05, "loss": 2.0778, "step": 137 }, { "epoch": 0.013687760365006943, "grad_norm": 0.6554325222969055, "learning_rate": 3.2827368421052634e-05, "loss": 2.2772, "step": 138 }, { "epoch": 0.013786947034318587, "grad_norm": 0.6328533291816711, "learning_rate": 3.2297894736842104e-05, "loss": 2.1124, "step": 139 }, { "epoch": 0.013886133703630232, "grad_norm": 0.6250814199447632, "learning_rate": 3.176842105263158e-05, "loss": 2.1146, "step": 140 }, { "epoch": 0.013985320372941876, "grad_norm": 0.7211160063743591, "learning_rate": 3.123894736842106e-05, "loss": 1.9403, "step": 141 }, { "epoch": 0.014084507042253521, "grad_norm": 0.6549091339111328, "learning_rate": 3.070947368421053e-05, "loss": 2.0405, "step": 142 }, { "epoch": 0.014183693711565165, "grad_norm": 0.7072272300720215, "learning_rate": 3.018e-05, "loss": 2.0984, "step": 143 }, { "epoch": 0.01428288038087681, "grad_norm": 0.7073689103126526, "learning_rate": 2.9650526315789473e-05, "loss": 1.8046, "step": 144 }, { "epoch": 0.014382067050188454, "grad_norm": 0.7743393182754517, "learning_rate": 2.912105263157895e-05, "loss": 1.871, "step": 145 }, { "epoch": 0.0144812537195001, "grad_norm": 0.8098898530006409, "learning_rate": 2.8591578947368424e-05, "loss": 1.9678, "step": 146 }, { "epoch": 0.014580440388811743, "grad_norm": 0.7993214130401611, "learning_rate": 2.8062105263157894e-05, "loss": 1.9601, "step": 147 }, { "epoch": 0.014679627058123389, "grad_norm": 0.9647625684738159, "learning_rate": 2.753263157894737e-05, "loss": 1.7944, "step": 148 }, { "epoch": 0.014778813727435032, "grad_norm": 1.02372145652771, "learning_rate": 2.7003157894736845e-05, "loss": 1.7146, "step": 149 }, { "epoch": 0.014878000396746678, "grad_norm": 1.2415416240692139, "learning_rate": 2.6473684210526315e-05, "loss": 1.5707, "step": 150 }, { "epoch": 0.014878000396746678, "eval_loss": 2.04632830619812, "eval_runtime": 452.9007, "eval_samples_per_second": 9.373, "eval_steps_per_second": 2.345, "step": 150 }, { "epoch": 0.014977187066058321, "grad_norm": 0.27521196007728577, "learning_rate": 2.5944210526315793e-05, "loss": 1.3203, "step": 151 }, { "epoch": 0.015076373735369967, "grad_norm": 0.31621789932250977, "learning_rate": 2.5414736842105266e-05, "loss": 1.8038, "step": 152 }, { "epoch": 0.01517556040468161, "grad_norm": 0.3353915214538574, "learning_rate": 2.4885263157894737e-05, "loss": 1.8907, "step": 153 }, { "epoch": 0.015274747073993256, "grad_norm": 0.36678266525268555, "learning_rate": 2.4355789473684214e-05, "loss": 2.2098, "step": 154 }, { "epoch": 0.0153739337433049, "grad_norm": 0.3651160001754761, "learning_rate": 2.3826315789473684e-05, "loss": 1.8027, "step": 155 }, { "epoch": 0.015473120412616545, "grad_norm": 0.3704912066459656, "learning_rate": 2.3296842105263158e-05, "loss": 2.0844, "step": 156 }, { "epoch": 0.015572307081928189, "grad_norm": 0.5724167227745056, "learning_rate": 2.2767368421052635e-05, "loss": 1.8522, "step": 157 }, { "epoch": 0.015671493751239832, "grad_norm": 0.44060230255126953, "learning_rate": 2.2237894736842105e-05, "loss": 2.2058, "step": 158 }, { "epoch": 0.015770680420551478, "grad_norm": 0.40229400992393494, "learning_rate": 2.170842105263158e-05, "loss": 2.1191, "step": 159 }, { "epoch": 0.015869867089863123, "grad_norm": 0.42183592915534973, "learning_rate": 2.1178947368421053e-05, "loss": 2.2083, "step": 160 }, { "epoch": 0.01596905375917477, "grad_norm": 0.45579245686531067, "learning_rate": 2.0649473684210527e-05, "loss": 2.1238, "step": 161 }, { "epoch": 0.01606824042848641, "grad_norm": 0.44921940565109253, "learning_rate": 2.0120000000000004e-05, "loss": 2.0088, "step": 162 }, { "epoch": 0.016167427097798056, "grad_norm": 0.4502014219760895, "learning_rate": 1.9590526315789474e-05, "loss": 2.0574, "step": 163 }, { "epoch": 0.0162666137671097, "grad_norm": 0.45939356088638306, "learning_rate": 1.9061052631578948e-05, "loss": 2.0133, "step": 164 }, { "epoch": 0.016365800436421343, "grad_norm": 0.4568996727466583, "learning_rate": 1.8531578947368422e-05, "loss": 1.9689, "step": 165 }, { "epoch": 0.01646498710573299, "grad_norm": 0.4249691367149353, "learning_rate": 1.8002105263157896e-05, "loss": 2.0251, "step": 166 }, { "epoch": 0.016564173775044634, "grad_norm": 0.47442981600761414, "learning_rate": 1.747263157894737e-05, "loss": 2.097, "step": 167 }, { "epoch": 0.01666336044435628, "grad_norm": 0.456521600484848, "learning_rate": 1.6943157894736843e-05, "loss": 2.0002, "step": 168 }, { "epoch": 0.01676254711366792, "grad_norm": 0.4548923075199127, "learning_rate": 1.6413684210526317e-05, "loss": 2.1861, "step": 169 }, { "epoch": 0.016861733782979567, "grad_norm": 0.46176818013191223, "learning_rate": 1.588421052631579e-05, "loss": 1.8373, "step": 170 }, { "epoch": 0.016960920452291212, "grad_norm": 0.4893608093261719, "learning_rate": 1.5354736842105264e-05, "loss": 2.0277, "step": 171 }, { "epoch": 0.017060107121602858, "grad_norm": 0.4989417493343353, "learning_rate": 1.4825263157894736e-05, "loss": 2.1915, "step": 172 }, { "epoch": 0.0171592937909145, "grad_norm": 0.490999698638916, "learning_rate": 1.4295789473684212e-05, "loss": 2.1797, "step": 173 }, { "epoch": 0.017258480460226145, "grad_norm": 0.4978395104408264, "learning_rate": 1.3766315789473686e-05, "loss": 2.067, "step": 174 }, { "epoch": 0.01735766712953779, "grad_norm": 0.4830182194709778, "learning_rate": 1.3236842105263158e-05, "loss": 2.1106, "step": 175 }, { "epoch": 0.017456853798849436, "grad_norm": 0.5352585911750793, "learning_rate": 1.2707368421052633e-05, "loss": 2.2133, "step": 176 }, { "epoch": 0.017556040468161078, "grad_norm": 0.493240088224411, "learning_rate": 1.2177894736842107e-05, "loss": 1.895, "step": 177 }, { "epoch": 0.017655227137472723, "grad_norm": 0.5840253233909607, "learning_rate": 1.1648421052631579e-05, "loss": 2.1145, "step": 178 }, { "epoch": 0.01775441380678437, "grad_norm": 0.5503016710281372, "learning_rate": 1.1118947368421053e-05, "loss": 2.1405, "step": 179 }, { "epoch": 0.017853600476096014, "grad_norm": 0.563595712184906, "learning_rate": 1.0589473684210526e-05, "loss": 2.1998, "step": 180 }, { "epoch": 0.017952787145407656, "grad_norm": 0.5283192992210388, "learning_rate": 1.0060000000000002e-05, "loss": 1.8799, "step": 181 }, { "epoch": 0.0180519738147193, "grad_norm": 0.6128177642822266, "learning_rate": 9.530526315789474e-06, "loss": 2.149, "step": 182 }, { "epoch": 0.018151160484030947, "grad_norm": 0.5376093983650208, "learning_rate": 9.001052631578948e-06, "loss": 2.0083, "step": 183 }, { "epoch": 0.018250347153342592, "grad_norm": 0.5802195072174072, "learning_rate": 8.471578947368422e-06, "loss": 2.2071, "step": 184 }, { "epoch": 0.018349533822654234, "grad_norm": 0.5402345061302185, "learning_rate": 7.942105263157895e-06, "loss": 2.171, "step": 185 }, { "epoch": 0.01844872049196588, "grad_norm": 0.6071547269821167, "learning_rate": 7.412631578947368e-06, "loss": 2.1522, "step": 186 }, { "epoch": 0.018547907161277525, "grad_norm": 0.5930723547935486, "learning_rate": 6.883157894736843e-06, "loss": 2.2813, "step": 187 }, { "epoch": 0.01864709383058917, "grad_norm": 0.5779694318771362, "learning_rate": 6.3536842105263166e-06, "loss": 2.0468, "step": 188 }, { "epoch": 0.018746280499900812, "grad_norm": 0.6479222178459167, "learning_rate": 5.8242105263157895e-06, "loss": 2.2584, "step": 189 }, { "epoch": 0.018845467169212458, "grad_norm": 0.6813433170318604, "learning_rate": 5.294736842105263e-06, "loss": 1.9731, "step": 190 }, { "epoch": 0.018944653838524103, "grad_norm": 0.5957227945327759, "learning_rate": 4.765263157894737e-06, "loss": 2.0371, "step": 191 }, { "epoch": 0.01904384050783575, "grad_norm": 0.7192276120185852, "learning_rate": 4.235789473684211e-06, "loss": 2.041, "step": 192 }, { "epoch": 0.01914302717714739, "grad_norm": 0.7070173025131226, "learning_rate": 3.706315789473684e-06, "loss": 2.2341, "step": 193 }, { "epoch": 0.019242213846459036, "grad_norm": 0.7068848013877869, "learning_rate": 3.1768421052631583e-06, "loss": 2.0895, "step": 194 }, { "epoch": 0.01934140051577068, "grad_norm": 0.7108703851699829, "learning_rate": 2.6473684210526316e-06, "loss": 1.7679, "step": 195 }, { "epoch": 0.019440587185082323, "grad_norm": 0.7898332476615906, "learning_rate": 2.1178947368421054e-06, "loss": 1.9642, "step": 196 }, { "epoch": 0.01953977385439397, "grad_norm": 1.223617672920227, "learning_rate": 1.5884210526315791e-06, "loss": 2.1923, "step": 197 }, { "epoch": 0.019638960523705614, "grad_norm": 0.9150407314300537, "learning_rate": 1.0589473684210527e-06, "loss": 2.0014, "step": 198 }, { "epoch": 0.01973814719301726, "grad_norm": 0.9921555519104004, "learning_rate": 5.294736842105263e-07, "loss": 1.6767, "step": 199 }, { "epoch": 0.0198373338623289, "grad_norm": 1.3139691352844238, "learning_rate": 0.0, "loss": 1.665, "step": 200 }, { "epoch": 0.0198373338623289, "eval_loss": 2.045283317565918, "eval_runtime": 453.3485, "eval_samples_per_second": 9.364, "eval_steps_per_second": 2.343, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.1094434267136e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }