{ "best_metric": 1.6460583209991455, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.08525149190110827, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005683432793407218, "grad_norm": 0.3731946051120758, "learning_rate": 1.018e-05, "loss": 1.6671, "step": 1 }, { "epoch": 0.0005683432793407218, "eval_loss": 1.9569587707519531, "eval_runtime": 37.4837, "eval_samples_per_second": 19.769, "eval_steps_per_second": 4.962, "step": 1 }, { "epoch": 0.0011366865586814436, "grad_norm": 0.8963428735733032, "learning_rate": 2.036e-05, "loss": 1.5645, "step": 2 }, { "epoch": 0.0017050298380221654, "grad_norm": 0.4044896364212036, "learning_rate": 3.0539999999999996e-05, "loss": 1.5765, "step": 3 }, { "epoch": 0.002273373117362887, "grad_norm": 0.4547571539878845, "learning_rate": 4.072e-05, "loss": 1.776, "step": 4 }, { "epoch": 0.002841716396703609, "grad_norm": 0.49056270718574524, "learning_rate": 5.09e-05, "loss": 1.6584, "step": 5 }, { "epoch": 0.0034100596760443308, "grad_norm": 0.5482068061828613, "learning_rate": 6.107999999999999e-05, "loss": 1.6111, "step": 6 }, { "epoch": 0.003978402955385053, "grad_norm": 0.5801917314529419, "learning_rate": 7.125999999999999e-05, "loss": 1.9129, "step": 7 }, { "epoch": 0.004546746234725774, "grad_norm": 0.6095445156097412, "learning_rate": 8.144e-05, "loss": 1.6491, "step": 8 }, { "epoch": 0.005115089514066497, "grad_norm": 0.716426432132721, "learning_rate": 9.162e-05, "loss": 1.7432, "step": 9 }, { "epoch": 0.005683432793407218, "grad_norm": 0.7395761609077454, "learning_rate": 0.0001018, "loss": 1.4594, "step": 10 }, { "epoch": 0.00625177607274794, "grad_norm": 0.6633386015892029, "learning_rate": 0.00010126421052631578, "loss": 1.687, "step": 11 }, { "epoch": 0.0068201193520886615, "grad_norm": 2.7426133155822754, "learning_rate": 0.00010072842105263156, "loss": 1.6098, "step": 12 }, { "epoch": 0.007388462631429384, "grad_norm": 0.6971986293792725, "learning_rate": 0.00010019263157894736, "loss": 1.7098, "step": 13 }, { "epoch": 0.007956805910770106, "grad_norm": 0.7126003503799438, "learning_rate": 9.965684210526316e-05, "loss": 1.6745, "step": 14 }, { "epoch": 0.008525149190110827, "grad_norm": 0.7513856887817383, "learning_rate": 9.912105263157895e-05, "loss": 1.8218, "step": 15 }, { "epoch": 0.009093492469451549, "grad_norm": 0.8330377340316772, "learning_rate": 9.858526315789473e-05, "loss": 1.6449, "step": 16 }, { "epoch": 0.00966183574879227, "grad_norm": 0.7440202832221985, "learning_rate": 9.804947368421052e-05, "loss": 1.5405, "step": 17 }, { "epoch": 0.010230179028132993, "grad_norm": 0.800340473651886, "learning_rate": 9.75136842105263e-05, "loss": 1.5846, "step": 18 }, { "epoch": 0.010798522307473715, "grad_norm": 0.8122618198394775, "learning_rate": 9.69778947368421e-05, "loss": 1.6476, "step": 19 }, { "epoch": 0.011366865586814436, "grad_norm": 0.7880124449729919, "learning_rate": 9.644210526315789e-05, "loss": 1.7152, "step": 20 }, { "epoch": 0.011935208866155157, "grad_norm": 0.830104649066925, "learning_rate": 9.590631578947369e-05, "loss": 1.6601, "step": 21 }, { "epoch": 0.01250355214549588, "grad_norm": 1.2284358739852905, "learning_rate": 9.537052631578947e-05, "loss": 1.7889, "step": 22 }, { "epoch": 0.013071895424836602, "grad_norm": 0.9070674777030945, "learning_rate": 9.483473684210526e-05, "loss": 1.6175, "step": 23 }, { "epoch": 0.013640238704177323, "grad_norm": 0.9066914916038513, "learning_rate": 9.429894736842104e-05, "loss": 1.6459, "step": 24 }, { "epoch": 0.014208581983518044, "grad_norm": 2.1840057373046875, "learning_rate": 9.376315789473684e-05, "loss": 1.6321, "step": 25 }, { "epoch": 0.014776925262858768, "grad_norm": 1.0758976936340332, "learning_rate": 9.322736842105262e-05, "loss": 1.8035, "step": 26 }, { "epoch": 0.015345268542199489, "grad_norm": 0.9917031526565552, "learning_rate": 9.269157894736842e-05, "loss": 1.7351, "step": 27 }, { "epoch": 0.015913611821540212, "grad_norm": 0.9799713492393494, "learning_rate": 9.215578947368421e-05, "loss": 1.5575, "step": 28 }, { "epoch": 0.01648195510088093, "grad_norm": 1.1645103693008423, "learning_rate": 9.162e-05, "loss": 1.3628, "step": 29 }, { "epoch": 0.017050298380221655, "grad_norm": 1.0407999753952026, "learning_rate": 9.108421052631578e-05, "loss": 1.5246, "step": 30 }, { "epoch": 0.017618641659562374, "grad_norm": 1.0878431797027588, "learning_rate": 9.054842105263158e-05, "loss": 1.7921, "step": 31 }, { "epoch": 0.018186984938903097, "grad_norm": 1.138879418373108, "learning_rate": 9.001263157894736e-05, "loss": 1.801, "step": 32 }, { "epoch": 0.01875532821824382, "grad_norm": 1.1667633056640625, "learning_rate": 8.947684210526315e-05, "loss": 1.742, "step": 33 }, { "epoch": 0.01932367149758454, "grad_norm": 1.2917413711547852, "learning_rate": 8.894105263157895e-05, "loss": 1.6671, "step": 34 }, { "epoch": 0.019892014776925263, "grad_norm": 1.376717209815979, "learning_rate": 8.840526315789473e-05, "loss": 1.3901, "step": 35 }, { "epoch": 0.020460358056265986, "grad_norm": 1.3317391872406006, "learning_rate": 8.786947368421052e-05, "loss": 1.6386, "step": 36 }, { "epoch": 0.021028701335606706, "grad_norm": 1.414566993713379, "learning_rate": 8.733368421052632e-05, "loss": 1.65, "step": 37 }, { "epoch": 0.02159704461494743, "grad_norm": 1.6730046272277832, "learning_rate": 8.67978947368421e-05, "loss": 1.7057, "step": 38 }, { "epoch": 0.02216538789428815, "grad_norm": 1.3370819091796875, "learning_rate": 8.626210526315789e-05, "loss": 1.269, "step": 39 }, { "epoch": 0.022733731173628872, "grad_norm": 2.1311228275299072, "learning_rate": 8.572631578947367e-05, "loss": 1.631, "step": 40 }, { "epoch": 0.023302074452969595, "grad_norm": 1.6010485887527466, "learning_rate": 8.519052631578947e-05, "loss": 1.4852, "step": 41 }, { "epoch": 0.023870417732310314, "grad_norm": 1.5216920375823975, "learning_rate": 8.465473684210527e-05, "loss": 1.0266, "step": 42 }, { "epoch": 0.024438761011651038, "grad_norm": 1.7472769021987915, "learning_rate": 8.411894736842105e-05, "loss": 1.71, "step": 43 }, { "epoch": 0.02500710429099176, "grad_norm": 2.0321545600891113, "learning_rate": 8.358315789473684e-05, "loss": 2.0073, "step": 44 }, { "epoch": 0.02557544757033248, "grad_norm": 1.5551754236221313, "learning_rate": 8.304736842105262e-05, "loss": 1.6646, "step": 45 }, { "epoch": 0.026143790849673203, "grad_norm": 1.6872512102127075, "learning_rate": 8.251157894736841e-05, "loss": 1.4299, "step": 46 }, { "epoch": 0.026712134129013923, "grad_norm": 3.1502418518066406, "learning_rate": 8.197578947368421e-05, "loss": 2.1225, "step": 47 }, { "epoch": 0.027280477408354646, "grad_norm": 2.6411232948303223, "learning_rate": 8.144e-05, "loss": 2.2605, "step": 48 }, { "epoch": 0.02784882068769537, "grad_norm": 2.905674934387207, "learning_rate": 8.090421052631579e-05, "loss": 1.9003, "step": 49 }, { "epoch": 0.02841716396703609, "grad_norm": 3.372375965118408, "learning_rate": 8.036842105263158e-05, "loss": 1.8968, "step": 50 }, { "epoch": 0.02841716396703609, "eval_loss": 1.798862338066101, "eval_runtime": 37.6661, "eval_samples_per_second": 19.673, "eval_steps_per_second": 4.938, "step": 50 }, { "epoch": 0.028985507246376812, "grad_norm": 0.7775370478630066, "learning_rate": 7.983263157894736e-05, "loss": 1.8201, "step": 51 }, { "epoch": 0.029553850525717535, "grad_norm": 0.7624815702438354, "learning_rate": 7.929684210526315e-05, "loss": 1.6735, "step": 52 }, { "epoch": 0.030122193805058255, "grad_norm": 0.6627716422080994, "learning_rate": 7.876105263157895e-05, "loss": 1.7316, "step": 53 }, { "epoch": 0.030690537084398978, "grad_norm": 0.5327397584915161, "learning_rate": 7.822526315789473e-05, "loss": 1.4878, "step": 54 }, { "epoch": 0.0312588803637397, "grad_norm": 0.5863519906997681, "learning_rate": 7.768947368421053e-05, "loss": 1.7539, "step": 55 }, { "epoch": 0.031827223643080424, "grad_norm": 0.5345349907875061, "learning_rate": 7.715368421052631e-05, "loss": 1.7121, "step": 56 }, { "epoch": 0.03239556692242114, "grad_norm": 0.49727949500083923, "learning_rate": 7.66178947368421e-05, "loss": 1.5745, "step": 57 }, { "epoch": 0.03296391020176186, "grad_norm": 0.5512779355049133, "learning_rate": 7.608210526315788e-05, "loss": 1.6066, "step": 58 }, { "epoch": 0.033532253481102586, "grad_norm": 0.5462706089019775, "learning_rate": 7.554631578947368e-05, "loss": 1.5929, "step": 59 }, { "epoch": 0.03410059676044331, "grad_norm": 0.5814910531044006, "learning_rate": 7.501052631578947e-05, "loss": 1.4255, "step": 60 }, { "epoch": 0.03466894003978403, "grad_norm": 0.5507473349571228, "learning_rate": 7.447473684210527e-05, "loss": 1.6272, "step": 61 }, { "epoch": 0.03523728331912475, "grad_norm": 0.6342865824699402, "learning_rate": 7.393894736842105e-05, "loss": 1.6846, "step": 62 }, { "epoch": 0.03580562659846547, "grad_norm": 0.6820300221443176, "learning_rate": 7.340315789473684e-05, "loss": 1.7388, "step": 63 }, { "epoch": 0.036373969877806195, "grad_norm": 0.6083571314811707, "learning_rate": 7.286736842105262e-05, "loss": 1.6918, "step": 64 }, { "epoch": 0.03694231315714692, "grad_norm": 0.7497929334640503, "learning_rate": 7.233157894736842e-05, "loss": 1.5324, "step": 65 }, { "epoch": 0.03751065643648764, "grad_norm": 0.6015780568122864, "learning_rate": 7.179578947368421e-05, "loss": 1.5238, "step": 66 }, { "epoch": 0.03807899971582836, "grad_norm": 0.6692572832107544, "learning_rate": 7.125999999999999e-05, "loss": 1.776, "step": 67 }, { "epoch": 0.03864734299516908, "grad_norm": 0.7358421683311462, "learning_rate": 7.072421052631579e-05, "loss": 1.6562, "step": 68 }, { "epoch": 0.0392156862745098, "grad_norm": 0.6718075275421143, "learning_rate": 7.018842105263158e-05, "loss": 1.6689, "step": 69 }, { "epoch": 0.039784029553850526, "grad_norm": 0.7654300928115845, "learning_rate": 6.965263157894736e-05, "loss": 1.7151, "step": 70 }, { "epoch": 0.04035237283319125, "grad_norm": 0.786693274974823, "learning_rate": 6.911684210526316e-05, "loss": 1.4008, "step": 71 }, { "epoch": 0.04092071611253197, "grad_norm": 0.7517386674880981, "learning_rate": 6.858105263157894e-05, "loss": 1.6763, "step": 72 }, { "epoch": 0.04148905939187269, "grad_norm": 0.7885333895683289, "learning_rate": 6.804526315789473e-05, "loss": 1.5398, "step": 73 }, { "epoch": 0.04205740267121341, "grad_norm": 0.8720977306365967, "learning_rate": 6.750947368421052e-05, "loss": 1.792, "step": 74 }, { "epoch": 0.042625745950554135, "grad_norm": 0.8613812923431396, "learning_rate": 6.697368421052631e-05, "loss": 1.6151, "step": 75 }, { "epoch": 0.04319408922989486, "grad_norm": 1.1323267221450806, "learning_rate": 6.64378947368421e-05, "loss": 1.7411, "step": 76 }, { "epoch": 0.04376243250923558, "grad_norm": 0.8913275003433228, "learning_rate": 6.59021052631579e-05, "loss": 1.8418, "step": 77 }, { "epoch": 0.0443307757885763, "grad_norm": 0.9520891308784485, "learning_rate": 6.536631578947368e-05, "loss": 1.6057, "step": 78 }, { "epoch": 0.04489911906791702, "grad_norm": 0.9143445491790771, "learning_rate": 6.483052631578947e-05, "loss": 1.2231, "step": 79 }, { "epoch": 0.045467462347257744, "grad_norm": 0.9965983629226685, "learning_rate": 6.429473684210525e-05, "loss": 1.6253, "step": 80 }, { "epoch": 0.04603580562659847, "grad_norm": 0.9294396042823792, "learning_rate": 6.375894736842104e-05, "loss": 1.9669, "step": 81 }, { "epoch": 0.04660414890593919, "grad_norm": 0.9775664210319519, "learning_rate": 6.322315789473684e-05, "loss": 1.4699, "step": 82 }, { "epoch": 0.047172492185279906, "grad_norm": 1.1528264284133911, "learning_rate": 6.268736842105264e-05, "loss": 1.6645, "step": 83 }, { "epoch": 0.04774083546462063, "grad_norm": 1.1889389753341675, "learning_rate": 6.215157894736842e-05, "loss": 1.4175, "step": 84 }, { "epoch": 0.04830917874396135, "grad_norm": 1.1938812732696533, "learning_rate": 6.16157894736842e-05, "loss": 1.4217, "step": 85 }, { "epoch": 0.048877522023302075, "grad_norm": 1.2877745628356934, "learning_rate": 6.107999999999999e-05, "loss": 1.501, "step": 86 }, { "epoch": 0.0494458653026428, "grad_norm": 1.493168830871582, "learning_rate": 6.054421052631578e-05, "loss": 1.6804, "step": 87 }, { "epoch": 0.05001420858198352, "grad_norm": 1.2537355422973633, "learning_rate": 6.000842105263157e-05, "loss": 1.5172, "step": 88 }, { "epoch": 0.05058255186132424, "grad_norm": 1.3163360357284546, "learning_rate": 5.947263157894737e-05, "loss": 1.5297, "step": 89 }, { "epoch": 0.05115089514066496, "grad_norm": 1.7828099727630615, "learning_rate": 5.893684210526316e-05, "loss": 1.8455, "step": 90 }, { "epoch": 0.051719238420005684, "grad_norm": 1.7339277267456055, "learning_rate": 5.8401052631578944e-05, "loss": 1.3947, "step": 91 }, { "epoch": 0.05228758169934641, "grad_norm": 1.4898391962051392, "learning_rate": 5.7865263157894736e-05, "loss": 1.2916, "step": 92 }, { "epoch": 0.05285592497868713, "grad_norm": 2.050482988357544, "learning_rate": 5.732947368421052e-05, "loss": 1.8793, "step": 93 }, { "epoch": 0.053424268258027846, "grad_norm": 1.588317632675171, "learning_rate": 5.6793684210526306e-05, "loss": 1.2028, "step": 94 }, { "epoch": 0.05399261153736857, "grad_norm": 1.3720221519470215, "learning_rate": 5.6257894736842105e-05, "loss": 1.1236, "step": 95 }, { "epoch": 0.05456095481670929, "grad_norm": 1.8759979009628296, "learning_rate": 5.57221052631579e-05, "loss": 1.4637, "step": 96 }, { "epoch": 0.055129298096050015, "grad_norm": 2.030932903289795, "learning_rate": 5.518631578947368e-05, "loss": 1.3648, "step": 97 }, { "epoch": 0.05569764137539074, "grad_norm": 2.7365996837615967, "learning_rate": 5.4650526315789474e-05, "loss": 1.8048, "step": 98 }, { "epoch": 0.056265984654731455, "grad_norm": 2.0562963485717773, "learning_rate": 5.411473684210526e-05, "loss": 1.6264, "step": 99 }, { "epoch": 0.05683432793407218, "grad_norm": 4.328183174133301, "learning_rate": 5.3578947368421044e-05, "loss": 2.3232, "step": 100 }, { "epoch": 0.05683432793407218, "eval_loss": 1.7202425003051758, "eval_runtime": 37.4994, "eval_samples_per_second": 19.76, "eval_steps_per_second": 4.96, "step": 100 }, { "epoch": 0.0574026712134129, "grad_norm": 0.5151413083076477, "learning_rate": 5.3043157894736836e-05, "loss": 1.817, "step": 101 }, { "epoch": 0.057971014492753624, "grad_norm": 0.5496950745582581, "learning_rate": 5.2507368421052635e-05, "loss": 1.6748, "step": 102 }, { "epoch": 0.05853935777209435, "grad_norm": 0.6355785727500916, "learning_rate": 5.197157894736842e-05, "loss": 1.7064, "step": 103 }, { "epoch": 0.05910770105143507, "grad_norm": 0.593001127243042, "learning_rate": 5.143578947368421e-05, "loss": 1.7486, "step": 104 }, { "epoch": 0.059676044330775786, "grad_norm": 0.5528516173362732, "learning_rate": 5.09e-05, "loss": 1.7127, "step": 105 }, { "epoch": 0.06024438761011651, "grad_norm": 0.5531303286552429, "learning_rate": 5.036421052631578e-05, "loss": 1.8465, "step": 106 }, { "epoch": 0.06081273088945723, "grad_norm": 0.5014846324920654, "learning_rate": 4.982842105263158e-05, "loss": 1.6323, "step": 107 }, { "epoch": 0.061381074168797956, "grad_norm": 0.5050337910652161, "learning_rate": 4.9292631578947366e-05, "loss": 1.5039, "step": 108 }, { "epoch": 0.06194941744813868, "grad_norm": 0.5129539370536804, "learning_rate": 4.875684210526315e-05, "loss": 1.6314, "step": 109 }, { "epoch": 0.0625177607274794, "grad_norm": 0.5408539175987244, "learning_rate": 4.822105263157894e-05, "loss": 1.6182, "step": 110 }, { "epoch": 0.06308610400682012, "grad_norm": 0.5475476384162903, "learning_rate": 4.7685263157894735e-05, "loss": 1.4697, "step": 111 }, { "epoch": 0.06365444728616085, "grad_norm": 0.5478246808052063, "learning_rate": 4.714947368421052e-05, "loss": 1.5738, "step": 112 }, { "epoch": 0.06422279056550156, "grad_norm": 0.5796051621437073, "learning_rate": 4.661368421052631e-05, "loss": 1.5052, "step": 113 }, { "epoch": 0.06479113384484228, "grad_norm": 0.5977994799613953, "learning_rate": 4.6077894736842104e-05, "loss": 1.526, "step": 114 }, { "epoch": 0.06535947712418301, "grad_norm": 0.5993626713752747, "learning_rate": 4.554210526315789e-05, "loss": 1.4977, "step": 115 }, { "epoch": 0.06592782040352373, "grad_norm": 0.6900733113288879, "learning_rate": 4.500631578947368e-05, "loss": 1.6882, "step": 116 }, { "epoch": 0.06649616368286446, "grad_norm": 0.6961638331413269, "learning_rate": 4.447052631578947e-05, "loss": 1.5659, "step": 117 }, { "epoch": 0.06706450696220517, "grad_norm": 0.6532340049743652, "learning_rate": 4.393473684210526e-05, "loss": 1.7786, "step": 118 }, { "epoch": 0.06763285024154589, "grad_norm": 0.7710056900978088, "learning_rate": 4.339894736842105e-05, "loss": 1.7923, "step": 119 }, { "epoch": 0.06820119352088662, "grad_norm": 0.6924229264259338, "learning_rate": 4.2863157894736835e-05, "loss": 1.5977, "step": 120 }, { "epoch": 0.06876953680022733, "grad_norm": 0.7953075170516968, "learning_rate": 4.2327368421052634e-05, "loss": 1.4162, "step": 121 }, { "epoch": 0.06933788007956806, "grad_norm": 0.6938318014144897, "learning_rate": 4.179157894736842e-05, "loss": 1.4258, "step": 122 }, { "epoch": 0.06990622335890878, "grad_norm": 0.7798824310302734, "learning_rate": 4.1255789473684204e-05, "loss": 1.4765, "step": 123 }, { "epoch": 0.0704745666382495, "grad_norm": 0.8481230139732361, "learning_rate": 4.072e-05, "loss": 1.4541, "step": 124 }, { "epoch": 0.07104290991759023, "grad_norm": 0.7436756491661072, "learning_rate": 4.018421052631579e-05, "loss": 1.6289, "step": 125 }, { "epoch": 0.07161125319693094, "grad_norm": 0.802385151386261, "learning_rate": 3.9648421052631573e-05, "loss": 1.4059, "step": 126 }, { "epoch": 0.07217959647627167, "grad_norm": 0.9276831150054932, "learning_rate": 3.9112631578947365e-05, "loss": 1.5148, "step": 127 }, { "epoch": 0.07274793975561239, "grad_norm": 0.8872740268707275, "learning_rate": 3.857684210526316e-05, "loss": 1.556, "step": 128 }, { "epoch": 0.0733162830349531, "grad_norm": 0.8716956973075867, "learning_rate": 3.804105263157894e-05, "loss": 1.562, "step": 129 }, { "epoch": 0.07388462631429384, "grad_norm": 1.0244389772415161, "learning_rate": 3.7505263157894734e-05, "loss": 1.3268, "step": 130 }, { "epoch": 0.07445296959363455, "grad_norm": 0.9489229917526245, "learning_rate": 3.6969473684210526e-05, "loss": 1.5957, "step": 131 }, { "epoch": 0.07502131287297528, "grad_norm": 1.275113582611084, "learning_rate": 3.643368421052631e-05, "loss": 1.6578, "step": 132 }, { "epoch": 0.075589656152316, "grad_norm": 1.0766980648040771, "learning_rate": 3.5897894736842103e-05, "loss": 1.7832, "step": 133 }, { "epoch": 0.07615799943165671, "grad_norm": 1.1330690383911133, "learning_rate": 3.5362105263157895e-05, "loss": 1.2917, "step": 134 }, { "epoch": 0.07672634271099744, "grad_norm": 1.1914342641830444, "learning_rate": 3.482631578947368e-05, "loss": 1.6503, "step": 135 }, { "epoch": 0.07729468599033816, "grad_norm": 1.1140198707580566, "learning_rate": 3.429052631578947e-05, "loss": 1.5326, "step": 136 }, { "epoch": 0.07786302926967889, "grad_norm": 1.1322687864303589, "learning_rate": 3.375473684210526e-05, "loss": 1.5365, "step": 137 }, { "epoch": 0.0784313725490196, "grad_norm": 1.012017011642456, "learning_rate": 3.321894736842105e-05, "loss": 1.4869, "step": 138 }, { "epoch": 0.07899971582836032, "grad_norm": 1.404184103012085, "learning_rate": 3.268315789473684e-05, "loss": 1.3593, "step": 139 }, { "epoch": 0.07956805910770105, "grad_norm": 1.4041898250579834, "learning_rate": 3.2147368421052627e-05, "loss": 1.6974, "step": 140 }, { "epoch": 0.08013640238704177, "grad_norm": 1.6375192403793335, "learning_rate": 3.161157894736842e-05, "loss": 1.4095, "step": 141 }, { "epoch": 0.0807047456663825, "grad_norm": 1.7191766500473022, "learning_rate": 3.107578947368421e-05, "loss": 1.6655, "step": 142 }, { "epoch": 0.08127308894572322, "grad_norm": 1.5540755987167358, "learning_rate": 3.0539999999999996e-05, "loss": 1.6224, "step": 143 }, { "epoch": 0.08184143222506395, "grad_norm": 1.3428224325180054, "learning_rate": 3.0004210526315784e-05, "loss": 1.0397, "step": 144 }, { "epoch": 0.08240977550440466, "grad_norm": 1.6207056045532227, "learning_rate": 2.946842105263158e-05, "loss": 1.2121, "step": 145 }, { "epoch": 0.08297811878374538, "grad_norm": 1.900486946105957, "learning_rate": 2.8932631578947368e-05, "loss": 1.9168, "step": 146 }, { "epoch": 0.08354646206308611, "grad_norm": 1.903605341911316, "learning_rate": 2.8396842105263153e-05, "loss": 1.2044, "step": 147 }, { "epoch": 0.08411480534242682, "grad_norm": 2.0578088760375977, "learning_rate": 2.786105263157895e-05, "loss": 1.5255, "step": 148 }, { "epoch": 0.08468314862176755, "grad_norm": 2.299281358718872, "learning_rate": 2.7325263157894737e-05, "loss": 1.5899, "step": 149 }, { "epoch": 0.08525149190110827, "grad_norm": 3.6123387813568115, "learning_rate": 2.6789473684210522e-05, "loss": 1.6884, "step": 150 }, { "epoch": 0.08525149190110827, "eval_loss": 1.6460583209991455, "eval_runtime": 37.4635, "eval_samples_per_second": 19.779, "eval_steps_per_second": 4.965, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.105777533727539e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }