{ "best_metric": 0.0005283160717226565, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.9287925696594427, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006191950464396285, "grad_norm": 12.259547233581543, "learning_rate": 1.0100000000000002e-05, "loss": 0.7539, "step": 1 }, { "epoch": 0.006191950464396285, "eval_loss": 1.197643518447876, "eval_runtime": 17.2464, "eval_samples_per_second": 126.172, "eval_steps_per_second": 3.943, "step": 1 }, { "epoch": 0.01238390092879257, "grad_norm": 35.96168899536133, "learning_rate": 2.0200000000000003e-05, "loss": 2.2829, "step": 2 }, { "epoch": 0.018575851393188854, "grad_norm": 43.1805305480957, "learning_rate": 3.0299999999999998e-05, "loss": 2.6079, "step": 3 }, { "epoch": 0.02476780185758514, "grad_norm": 36.352195739746094, "learning_rate": 4.0400000000000006e-05, "loss": 2.6617, "step": 4 }, { "epoch": 0.030959752321981424, "grad_norm": 24.298112869262695, "learning_rate": 5.05e-05, "loss": 1.9823, "step": 5 }, { "epoch": 0.03715170278637771, "grad_norm": 27.27290916442871, "learning_rate": 6.0599999999999996e-05, "loss": 2.0069, "step": 6 }, { "epoch": 0.043343653250773995, "grad_norm": 21.428409576416016, "learning_rate": 7.07e-05, "loss": 0.774, "step": 7 }, { "epoch": 0.04953560371517028, "grad_norm": 2.467296838760376, "learning_rate": 8.080000000000001e-05, "loss": 0.1882, "step": 8 }, { "epoch": 0.05572755417956656, "grad_norm": 2.2677502632141113, "learning_rate": 9.09e-05, "loss": 0.1176, "step": 9 }, { "epoch": 0.06191950464396285, "grad_norm": 18.355634689331055, "learning_rate": 0.000101, "loss": 0.1497, "step": 10 }, { "epoch": 0.06811145510835913, "grad_norm": 68.11518096923828, "learning_rate": 0.00010046842105263158, "loss": 1.0334, "step": 11 }, { "epoch": 0.07430340557275542, "grad_norm": 6.451508522033691, "learning_rate": 9.993684210526315e-05, "loss": 0.1085, "step": 12 }, { "epoch": 0.0804953560371517, "grad_norm": 1.9255141019821167, "learning_rate": 9.940526315789473e-05, "loss": 0.051, "step": 13 }, { "epoch": 0.08668730650154799, "grad_norm": 1.1394455432891846, "learning_rate": 9.887368421052632e-05, "loss": 0.0226, "step": 14 }, { "epoch": 0.09287925696594428, "grad_norm": 2.088902711868286, "learning_rate": 9.83421052631579e-05, "loss": 0.0207, "step": 15 }, { "epoch": 0.09907120743034056, "grad_norm": 4.613748073577881, "learning_rate": 9.781052631578948e-05, "loss": 0.0346, "step": 16 }, { "epoch": 0.10526315789473684, "grad_norm": 0.20414972305297852, "learning_rate": 9.727894736842106e-05, "loss": 0.0031, "step": 17 }, { "epoch": 0.11145510835913312, "grad_norm": 0.71966952085495, "learning_rate": 9.674736842105263e-05, "loss": 0.0053, "step": 18 }, { "epoch": 0.11764705882352941, "grad_norm": 1.1517033576965332, "learning_rate": 9.621578947368421e-05, "loss": 0.0203, "step": 19 }, { "epoch": 0.1238390092879257, "grad_norm": 3.582458972930908, "learning_rate": 9.568421052631578e-05, "loss": 0.0762, "step": 20 }, { "epoch": 0.13003095975232198, "grad_norm": 0.7943978905677795, "learning_rate": 9.515263157894737e-05, "loss": 0.0224, "step": 21 }, { "epoch": 0.13622291021671826, "grad_norm": 1.3800811767578125, "learning_rate": 9.462105263157895e-05, "loss": 0.0193, "step": 22 }, { "epoch": 0.14241486068111456, "grad_norm": 4.360982418060303, "learning_rate": 9.408947368421054e-05, "loss": 0.0334, "step": 23 }, { "epoch": 0.14860681114551083, "grad_norm": 0.9100430011749268, "learning_rate": 9.355789473684211e-05, "loss": 0.0058, "step": 24 }, { "epoch": 0.15479876160990713, "grad_norm": 4.23191499710083, "learning_rate": 9.302631578947369e-05, "loss": 0.028, "step": 25 }, { "epoch": 0.1609907120743034, "grad_norm": 2.2955400943756104, "learning_rate": 9.249473684210526e-05, "loss": 0.074, "step": 26 }, { "epoch": 0.16718266253869968, "grad_norm": 0.4862731397151947, "learning_rate": 9.196315789473685e-05, "loss": 0.0099, "step": 27 }, { "epoch": 0.17337461300309598, "grad_norm": 0.5931727886199951, "learning_rate": 9.143157894736843e-05, "loss": 0.0153, "step": 28 }, { "epoch": 0.17956656346749225, "grad_norm": 0.7942826151847839, "learning_rate": 9.09e-05, "loss": 0.0082, "step": 29 }, { "epoch": 0.18575851393188855, "grad_norm": 0.08755509555339813, "learning_rate": 9.036842105263158e-05, "loss": 0.0015, "step": 30 }, { "epoch": 0.19195046439628483, "grad_norm": 0.4156116545200348, "learning_rate": 8.983684210526316e-05, "loss": 0.0022, "step": 31 }, { "epoch": 0.19814241486068113, "grad_norm": 0.5781251788139343, "learning_rate": 8.930526315789474e-05, "loss": 0.0155, "step": 32 }, { "epoch": 0.2043343653250774, "grad_norm": 0.7526621222496033, "learning_rate": 8.877368421052632e-05, "loss": 0.0127, "step": 33 }, { "epoch": 0.21052631578947367, "grad_norm": 0.3467714786529541, "learning_rate": 8.82421052631579e-05, "loss": 0.0066, "step": 34 }, { "epoch": 0.21671826625386997, "grad_norm": 4.240686893463135, "learning_rate": 8.771052631578948e-05, "loss": 0.0301, "step": 35 }, { "epoch": 0.22291021671826625, "grad_norm": 0.24843335151672363, "learning_rate": 8.717894736842105e-05, "loss": 0.0011, "step": 36 }, { "epoch": 0.22910216718266255, "grad_norm": 1.6270495653152466, "learning_rate": 8.664736842105263e-05, "loss": 0.004, "step": 37 }, { "epoch": 0.23529411764705882, "grad_norm": 0.1393214613199234, "learning_rate": 8.61157894736842e-05, "loss": 0.0031, "step": 38 }, { "epoch": 0.24148606811145512, "grad_norm": 0.6503007411956787, "learning_rate": 8.55842105263158e-05, "loss": 0.006, "step": 39 }, { "epoch": 0.2476780185758514, "grad_norm": 3.4012598991394043, "learning_rate": 8.505263157894737e-05, "loss": 0.0159, "step": 40 }, { "epoch": 0.25386996904024767, "grad_norm": 1.4423998594284058, "learning_rate": 8.452105263157896e-05, "loss": 0.0132, "step": 41 }, { "epoch": 0.26006191950464397, "grad_norm": 0.7524486780166626, "learning_rate": 8.398947368421053e-05, "loss": 0.032, "step": 42 }, { "epoch": 0.26625386996904027, "grad_norm": 1.704451084136963, "learning_rate": 8.345789473684211e-05, "loss": 0.0119, "step": 43 }, { "epoch": 0.2724458204334365, "grad_norm": 0.21985603868961334, "learning_rate": 8.292631578947368e-05, "loss": 0.0033, "step": 44 }, { "epoch": 0.2786377708978328, "grad_norm": 1.0363315343856812, "learning_rate": 8.239473684210526e-05, "loss": 0.0076, "step": 45 }, { "epoch": 0.2848297213622291, "grad_norm": 0.25210151076316833, "learning_rate": 8.186315789473683e-05, "loss": 0.0031, "step": 46 }, { "epoch": 0.29102167182662536, "grad_norm": 0.6952300071716309, "learning_rate": 8.133157894736842e-05, "loss": 0.0122, "step": 47 }, { "epoch": 0.29721362229102166, "grad_norm": 0.16958408057689667, "learning_rate": 8.080000000000001e-05, "loss": 0.002, "step": 48 }, { "epoch": 0.30340557275541796, "grad_norm": 0.15895675122737885, "learning_rate": 8.026842105263159e-05, "loss": 0.001, "step": 49 }, { "epoch": 0.30959752321981426, "grad_norm": 0.29221901297569275, "learning_rate": 7.973684210526316e-05, "loss": 0.0018, "step": 50 }, { "epoch": 0.30959752321981426, "eval_loss": 0.0009313385817222297, "eval_runtime": 17.578, "eval_samples_per_second": 123.791, "eval_steps_per_second": 3.868, "step": 50 }, { "epoch": 0.3157894736842105, "grad_norm": 0.25268837809562683, "learning_rate": 7.920526315789474e-05, "loss": 0.0083, "step": 51 }, { "epoch": 0.3219814241486068, "grad_norm": 0.6461657881736755, "learning_rate": 7.867368421052631e-05, "loss": 0.0053, "step": 52 }, { "epoch": 0.3281733746130031, "grad_norm": 0.09922587871551514, "learning_rate": 7.814210526315789e-05, "loss": 0.001, "step": 53 }, { "epoch": 0.33436532507739936, "grad_norm": 0.09927251189947128, "learning_rate": 7.761052631578946e-05, "loss": 0.0012, "step": 54 }, { "epoch": 0.34055727554179566, "grad_norm": 0.11263656616210938, "learning_rate": 7.707894736842105e-05, "loss": 0.0005, "step": 55 }, { "epoch": 0.34674922600619196, "grad_norm": 0.35559403896331787, "learning_rate": 7.654736842105264e-05, "loss": 0.0021, "step": 56 }, { "epoch": 0.35294117647058826, "grad_norm": 0.13606180250644684, "learning_rate": 7.601578947368422e-05, "loss": 0.0042, "step": 57 }, { "epoch": 0.3591331269349845, "grad_norm": 0.15378090739250183, "learning_rate": 7.548421052631579e-05, "loss": 0.0014, "step": 58 }, { "epoch": 0.3653250773993808, "grad_norm": 0.009891056455671787, "learning_rate": 7.495263157894737e-05, "loss": 0.0002, "step": 59 }, { "epoch": 0.3715170278637771, "grad_norm": 0.0574217215180397, "learning_rate": 7.442105263157894e-05, "loss": 0.0002, "step": 60 }, { "epoch": 0.37770897832817335, "grad_norm": 0.005942782387137413, "learning_rate": 7.388947368421053e-05, "loss": 0.0001, "step": 61 }, { "epoch": 0.38390092879256965, "grad_norm": 0.5954540371894836, "learning_rate": 7.335789473684211e-05, "loss": 0.001, "step": 62 }, { "epoch": 0.39009287925696595, "grad_norm": 3.4444451332092285, "learning_rate": 7.282631578947368e-05, "loss": 0.0055, "step": 63 }, { "epoch": 0.39628482972136225, "grad_norm": 0.14950387179851532, "learning_rate": 7.229473684210527e-05, "loss": 0.0032, "step": 64 }, { "epoch": 0.4024767801857585, "grad_norm": 0.1800544261932373, "learning_rate": 7.176315789473685e-05, "loss": 0.0005, "step": 65 }, { "epoch": 0.4086687306501548, "grad_norm": 0.1437232792377472, "learning_rate": 7.123157894736842e-05, "loss": 0.0002, "step": 66 }, { "epoch": 0.4148606811145511, "grad_norm": 0.018890729174017906, "learning_rate": 7.07e-05, "loss": 0.0002, "step": 67 }, { "epoch": 0.42105263157894735, "grad_norm": 1.0753740072250366, "learning_rate": 7.016842105263159e-05, "loss": 0.0064, "step": 68 }, { "epoch": 0.42724458204334365, "grad_norm": 0.21867413818836212, "learning_rate": 6.963684210526316e-05, "loss": 0.002, "step": 69 }, { "epoch": 0.43343653250773995, "grad_norm": 0.22536171972751617, "learning_rate": 6.910526315789474e-05, "loss": 0.0034, "step": 70 }, { "epoch": 0.43962848297213625, "grad_norm": 0.6939528584480286, "learning_rate": 6.857368421052631e-05, "loss": 0.0062, "step": 71 }, { "epoch": 0.4458204334365325, "grad_norm": 0.3450081944465637, "learning_rate": 6.80421052631579e-05, "loss": 0.0036, "step": 72 }, { "epoch": 0.4520123839009288, "grad_norm": 0.0263675469905138, "learning_rate": 6.751052631578948e-05, "loss": 0.0001, "step": 73 }, { "epoch": 0.4582043343653251, "grad_norm": 0.017043374478816986, "learning_rate": 6.697894736842105e-05, "loss": 0.0001, "step": 74 }, { "epoch": 0.46439628482972134, "grad_norm": 1.584336757659912, "learning_rate": 6.644736842105264e-05, "loss": 0.013, "step": 75 }, { "epoch": 0.47058823529411764, "grad_norm": 0.08854586631059647, "learning_rate": 6.591578947368422e-05, "loss": 0.003, "step": 76 }, { "epoch": 0.47678018575851394, "grad_norm": 0.17147338390350342, "learning_rate": 6.538421052631579e-05, "loss": 0.0029, "step": 77 }, { "epoch": 0.48297213622291024, "grad_norm": 0.4910271167755127, "learning_rate": 6.485263157894737e-05, "loss": 0.0056, "step": 78 }, { "epoch": 0.4891640866873065, "grad_norm": 0.06682564318180084, "learning_rate": 6.432105263157894e-05, "loss": 0.0004, "step": 79 }, { "epoch": 0.4953560371517028, "grad_norm": 0.26821601390838623, "learning_rate": 6.378947368421053e-05, "loss": 0.0033, "step": 80 }, { "epoch": 0.5015479876160991, "grad_norm": 0.16297802329063416, "learning_rate": 6.32578947368421e-05, "loss": 0.0008, "step": 81 }, { "epoch": 0.5077399380804953, "grad_norm": 1.3748586177825928, "learning_rate": 6.27263157894737e-05, "loss": 0.0087, "step": 82 }, { "epoch": 0.5139318885448917, "grad_norm": 0.1394932121038437, "learning_rate": 6.219473684210527e-05, "loss": 0.0006, "step": 83 }, { "epoch": 0.5201238390092879, "grad_norm": 0.16295793652534485, "learning_rate": 6.166315789473685e-05, "loss": 0.0024, "step": 84 }, { "epoch": 0.5263157894736842, "grad_norm": 0.0500481054186821, "learning_rate": 6.113157894736842e-05, "loss": 0.0005, "step": 85 }, { "epoch": 0.5325077399380805, "grad_norm": 0.025755494832992554, "learning_rate": 6.0599999999999996e-05, "loss": 0.0002, "step": 86 }, { "epoch": 0.5386996904024768, "grad_norm": 0.9285590648651123, "learning_rate": 6.006842105263158e-05, "loss": 0.0006, "step": 87 }, { "epoch": 0.544891640866873, "grad_norm": 0.10077816247940063, "learning_rate": 5.953684210526315e-05, "loss": 0.0026, "step": 88 }, { "epoch": 0.5510835913312694, "grad_norm": 0.3003288805484772, "learning_rate": 5.900526315789474e-05, "loss": 0.0043, "step": 89 }, { "epoch": 0.5572755417956656, "grad_norm": 0.5609342455863953, "learning_rate": 5.847368421052632e-05, "loss": 0.0012, "step": 90 }, { "epoch": 0.5634674922600619, "grad_norm": 0.024184679612517357, "learning_rate": 5.79421052631579e-05, "loss": 0.0003, "step": 91 }, { "epoch": 0.5696594427244582, "grad_norm": 0.010705746710300446, "learning_rate": 5.7410526315789475e-05, "loss": 0.0002, "step": 92 }, { "epoch": 0.5758513931888545, "grad_norm": 0.07954201847314835, "learning_rate": 5.687894736842105e-05, "loss": 0.0005, "step": 93 }, { "epoch": 0.5820433436532507, "grad_norm": 0.0802941545844078, "learning_rate": 5.6347368421052625e-05, "loss": 0.0018, "step": 94 }, { "epoch": 0.5882352941176471, "grad_norm": 0.07533490657806396, "learning_rate": 5.5815789473684214e-05, "loss": 0.0009, "step": 95 }, { "epoch": 0.5944272445820433, "grad_norm": 0.2894861698150635, "learning_rate": 5.5284210526315796e-05, "loss": 0.0048, "step": 96 }, { "epoch": 0.6006191950464397, "grad_norm": 0.069795623421669, "learning_rate": 5.475263157894737e-05, "loss": 0.0007, "step": 97 }, { "epoch": 0.6068111455108359, "grad_norm": 0.06586623191833496, "learning_rate": 5.422105263157895e-05, "loss": 0.0007, "step": 98 }, { "epoch": 0.6130030959752322, "grad_norm": 0.0338183231651783, "learning_rate": 5.368947368421053e-05, "loss": 0.0003, "step": 99 }, { "epoch": 0.6191950464396285, "grad_norm": 0.019460417330265045, "learning_rate": 5.3157894736842104e-05, "loss": 0.0004, "step": 100 }, { "epoch": 0.6191950464396285, "eval_loss": 0.0008776471950113773, "eval_runtime": 17.382, "eval_samples_per_second": 125.187, "eval_steps_per_second": 3.912, "step": 100 }, { "epoch": 0.6253869969040248, "grad_norm": 0.08889586478471756, "learning_rate": 5.262631578947368e-05, "loss": 0.0021, "step": 101 }, { "epoch": 0.631578947368421, "grad_norm": 0.02110682614147663, "learning_rate": 5.209473684210527e-05, "loss": 0.0003, "step": 102 }, { "epoch": 0.6377708978328174, "grad_norm": 0.08070322871208191, "learning_rate": 5.1563157894736844e-05, "loss": 0.0004, "step": 103 }, { "epoch": 0.6439628482972136, "grad_norm": 0.10896036028862, "learning_rate": 5.1031578947368426e-05, "loss": 0.0009, "step": 104 }, { "epoch": 0.6501547987616099, "grad_norm": 0.06696660816669464, "learning_rate": 5.05e-05, "loss": 0.0006, "step": 105 }, { "epoch": 0.6563467492260062, "grad_norm": 0.24101363122463226, "learning_rate": 4.9968421052631576e-05, "loss": 0.0014, "step": 106 }, { "epoch": 0.6625386996904025, "grad_norm": 0.1499151736497879, "learning_rate": 4.943684210526316e-05, "loss": 0.008, "step": 107 }, { "epoch": 0.6687306501547987, "grad_norm": 0.37049758434295654, "learning_rate": 4.890526315789474e-05, "loss": 0.0029, "step": 108 }, { "epoch": 0.6749226006191951, "grad_norm": 0.04260842874646187, "learning_rate": 4.8373684210526316e-05, "loss": 0.0003, "step": 109 }, { "epoch": 0.6811145510835913, "grad_norm": 0.009202769957482815, "learning_rate": 4.784210526315789e-05, "loss": 0.0001, "step": 110 }, { "epoch": 0.6873065015479877, "grad_norm": 0.03566950932145119, "learning_rate": 4.731052631578947e-05, "loss": 0.0002, "step": 111 }, { "epoch": 0.6934984520123839, "grad_norm": 0.15307831764221191, "learning_rate": 4.6778947368421055e-05, "loss": 0.0004, "step": 112 }, { "epoch": 0.6996904024767802, "grad_norm": 0.059254102408885956, "learning_rate": 4.624736842105263e-05, "loss": 0.0011, "step": 113 }, { "epoch": 0.7058823529411765, "grad_norm": 0.02420051209628582, "learning_rate": 4.571578947368421e-05, "loss": 0.0003, "step": 114 }, { "epoch": 0.7120743034055728, "grad_norm": 0.14050903916358948, "learning_rate": 4.518421052631579e-05, "loss": 0.0072, "step": 115 }, { "epoch": 0.718266253869969, "grad_norm": 0.004327650181949139, "learning_rate": 4.465263157894737e-05, "loss": 0.0001, "step": 116 }, { "epoch": 0.7244582043343654, "grad_norm": 0.002650818321853876, "learning_rate": 4.412105263157895e-05, "loss": 0.0001, "step": 117 }, { "epoch": 0.7306501547987616, "grad_norm": 0.0020040925592184067, "learning_rate": 4.358947368421053e-05, "loss": 0.0001, "step": 118 }, { "epoch": 0.7368421052631579, "grad_norm": 0.019702734425663948, "learning_rate": 4.30578947368421e-05, "loss": 0.0008, "step": 119 }, { "epoch": 0.7430340557275542, "grad_norm": 0.026694906875491142, "learning_rate": 4.2526315789473685e-05, "loss": 0.0004, "step": 120 }, { "epoch": 0.7492260061919505, "grad_norm": 0.08692020922899246, "learning_rate": 4.199473684210527e-05, "loss": 0.0007, "step": 121 }, { "epoch": 0.7554179566563467, "grad_norm": 0.005789334420114756, "learning_rate": 4.146315789473684e-05, "loss": 0.0001, "step": 122 }, { "epoch": 0.7616099071207431, "grad_norm": 0.6568689942359924, "learning_rate": 4.093157894736842e-05, "loss": 0.0008, "step": 123 }, { "epoch": 0.7678018575851393, "grad_norm": 0.017778966575860977, "learning_rate": 4.0400000000000006e-05, "loss": 0.0001, "step": 124 }, { "epoch": 0.7739938080495357, "grad_norm": 0.4473741054534912, "learning_rate": 3.986842105263158e-05, "loss": 0.0008, "step": 125 }, { "epoch": 0.7801857585139319, "grad_norm": 0.028590694069862366, "learning_rate": 3.933684210526316e-05, "loss": 0.001, "step": 126 }, { "epoch": 0.7863777089783281, "grad_norm": 0.07842560857534409, "learning_rate": 3.880526315789473e-05, "loss": 0.0025, "step": 127 }, { "epoch": 0.7925696594427245, "grad_norm": 0.11984878033399582, "learning_rate": 3.827368421052632e-05, "loss": 0.001, "step": 128 }, { "epoch": 0.7987616099071208, "grad_norm": 0.21180228888988495, "learning_rate": 3.7742105263157896e-05, "loss": 0.0004, "step": 129 }, { "epoch": 0.804953560371517, "grad_norm": 0.24939629435539246, "learning_rate": 3.721052631578947e-05, "loss": 0.0011, "step": 130 }, { "epoch": 0.8111455108359134, "grad_norm": 0.32767805457115173, "learning_rate": 3.6678947368421054e-05, "loss": 0.0016, "step": 131 }, { "epoch": 0.8173374613003096, "grad_norm": 0.08400473743677139, "learning_rate": 3.6147368421052636e-05, "loss": 0.002, "step": 132 }, { "epoch": 0.8235294117647058, "grad_norm": 0.10696029663085938, "learning_rate": 3.561578947368421e-05, "loss": 0.0009, "step": 133 }, { "epoch": 0.8297213622291022, "grad_norm": 0.010906717739999294, "learning_rate": 3.508421052631579e-05, "loss": 0.0002, "step": 134 }, { "epoch": 0.8359133126934984, "grad_norm": 0.049126312136650085, "learning_rate": 3.455263157894737e-05, "loss": 0.0003, "step": 135 }, { "epoch": 0.8421052631578947, "grad_norm": 0.6138126254081726, "learning_rate": 3.402105263157895e-05, "loss": 0.0021, "step": 136 }, { "epoch": 0.848297213622291, "grad_norm": 0.37126851081848145, "learning_rate": 3.3489473684210526e-05, "loss": 0.0013, "step": 137 }, { "epoch": 0.8544891640866873, "grad_norm": 0.04636126756668091, "learning_rate": 3.295789473684211e-05, "loss": 0.003, "step": 138 }, { "epoch": 0.8606811145510835, "grad_norm": 0.038804881274700165, "learning_rate": 3.242631578947368e-05, "loss": 0.0005, "step": 139 }, { "epoch": 0.8668730650154799, "grad_norm": 0.031973619014024734, "learning_rate": 3.1894736842105265e-05, "loss": 0.0003, "step": 140 }, { "epoch": 0.8730650154798761, "grad_norm": 0.14747843146324158, "learning_rate": 3.136315789473685e-05, "loss": 0.0005, "step": 141 }, { "epoch": 0.8792569659442725, "grad_norm": 0.51484614610672, "learning_rate": 3.083157894736842e-05, "loss": 0.0004, "step": 142 }, { "epoch": 0.8854489164086687, "grad_norm": 0.43561312556266785, "learning_rate": 3.0299999999999998e-05, "loss": 0.0055, "step": 143 }, { "epoch": 0.891640866873065, "grad_norm": 0.011192581616342068, "learning_rate": 2.9768421052631577e-05, "loss": 0.0005, "step": 144 }, { "epoch": 0.8978328173374613, "grad_norm": 0.2694231867790222, "learning_rate": 2.923684210526316e-05, "loss": 0.0017, "step": 145 }, { "epoch": 0.9040247678018576, "grad_norm": 0.014473036862909794, "learning_rate": 2.8705263157894737e-05, "loss": 0.0002, "step": 146 }, { "epoch": 0.9102167182662538, "grad_norm": 0.013481522910296917, "learning_rate": 2.8173684210526313e-05, "loss": 0.0001, "step": 147 }, { "epoch": 0.9164086687306502, "grad_norm": 0.029815969988703728, "learning_rate": 2.7642105263157898e-05, "loss": 0.0002, "step": 148 }, { "epoch": 0.9226006191950464, "grad_norm": 0.021435940638184547, "learning_rate": 2.7110526315789473e-05, "loss": 0.0002, "step": 149 }, { "epoch": 0.9287925696594427, "grad_norm": 0.010874868370592594, "learning_rate": 2.6578947368421052e-05, "loss": 0.0002, "step": 150 }, { "epoch": 0.9287925696594427, "eval_loss": 0.0005283160717226565, "eval_runtime": 17.5726, "eval_samples_per_second": 123.829, "eval_steps_per_second": 3.87, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.7966786950169887e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }