|
{ |
|
"best_metric": 0.974936306476593, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.3714020427112349, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003714020427112349, |
|
"grad_norm": 1.807255744934082, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 1.3609, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003714020427112349, |
|
"eval_loss": 3.732529640197754, |
|
"eval_runtime": 33.8777, |
|
"eval_samples_per_second": 13.401, |
|
"eval_steps_per_second": 1.683, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007428040854224698, |
|
"grad_norm": 3.0510687828063965, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 2.1135, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.011142061281337047, |
|
"grad_norm": 3.5649664402008057, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 2.2581, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.014856081708449397, |
|
"grad_norm": 3.335415840148926, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 2.13, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.018570102135561744, |
|
"grad_norm": 4.153068542480469, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 2.5975, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.022284122562674095, |
|
"grad_norm": 3.5235085487365723, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.4077, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.025998142989786442, |
|
"grad_norm": 3.9450502395629883, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 2.7531, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.029712163416898793, |
|
"grad_norm": 4.102875232696533, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 2.7811, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.033426183844011144, |
|
"grad_norm": 4.623213768005371, |
|
"learning_rate": 1.5e-06, |
|
"loss": 2.9448, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03714020427112349, |
|
"grad_norm": 4.344907760620117, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 2.8671, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04085422469823584, |
|
"grad_norm": 4.495368480682373, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 3.0411, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04456824512534819, |
|
"grad_norm": 4.336016654968262, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.9452, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04828226555246054, |
|
"grad_norm": 4.537540435791016, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 3.0757, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.051996285979572884, |
|
"grad_norm": 4.8718953132629395, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 3.119, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.055710306406685235, |
|
"grad_norm": 4.760570049285889, |
|
"learning_rate": 2.5e-06, |
|
"loss": 3.0015, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.059424326833797586, |
|
"grad_norm": 4.955251216888428, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 3.2608, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06313834726090993, |
|
"grad_norm": 4.898932456970215, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 3.1958, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06685236768802229, |
|
"grad_norm": 4.579758644104004, |
|
"learning_rate": 3e-06, |
|
"loss": 2.9159, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07056638811513463, |
|
"grad_norm": 4.835488796234131, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 3.0749, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07428040854224698, |
|
"grad_norm": 4.509119510650635, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 3.0182, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07799442896935933, |
|
"grad_norm": 4.78074312210083, |
|
"learning_rate": 3.5e-06, |
|
"loss": 3.3061, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08170844939647168, |
|
"grad_norm": 4.850345611572266, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 3.4289, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08542246982358404, |
|
"grad_norm": 4.934477806091309, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 3.0821, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08913649025069638, |
|
"grad_norm": 4.942277908325195, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 3.1767, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09285051067780872, |
|
"grad_norm": 5.097426891326904, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 3.5164, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09656453110492108, |
|
"grad_norm": 4.779514312744141, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 3.1813, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.10027855153203342, |
|
"grad_norm": 4.577581405639648, |
|
"learning_rate": 4.5e-06, |
|
"loss": 2.941, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10399257195914577, |
|
"grad_norm": 4.432027339935303, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 3.0727, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.10770659238625813, |
|
"grad_norm": 4.62089729309082, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 3.1745, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11142061281337047, |
|
"grad_norm": 5.194108486175537, |
|
"learning_rate": 5e-06, |
|
"loss": 3.2936, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11513463324048283, |
|
"grad_norm": 4.469546794891357, |
|
"learning_rate": 4.997482666353287e-06, |
|
"loss": 2.911, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.11884865366759517, |
|
"grad_norm": 4.538417339324951, |
|
"learning_rate": 4.989935734988098e-06, |
|
"loss": 3.1691, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12256267409470752, |
|
"grad_norm": 4.686911582946777, |
|
"learning_rate": 4.977374404419838e-06, |
|
"loss": 2.6434, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.12627669452181986, |
|
"grad_norm": 4.282558441162109, |
|
"learning_rate": 4.959823971496575e-06, |
|
"loss": 2.6201, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.12999071494893222, |
|
"grad_norm": 4.451693534851074, |
|
"learning_rate": 4.937319780454559e-06, |
|
"loss": 2.816, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13370473537604458, |
|
"grad_norm": 4.864418029785156, |
|
"learning_rate": 4.909907151739634e-06, |
|
"loss": 2.6204, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1374187558031569, |
|
"grad_norm": 4.817498207092285, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 2.3239, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.14113277623026926, |
|
"grad_norm": 4.677555084228516, |
|
"learning_rate": 4.8405871765993435e-06, |
|
"loss": 2.3435, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.14484679665738162, |
|
"grad_norm": 4.646546363830566, |
|
"learning_rate": 4.7988194313786275e-06, |
|
"loss": 2.5372, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.14856081708449395, |
|
"grad_norm": 4.66044807434082, |
|
"learning_rate": 4.752422169756048e-06, |
|
"loss": 2.5341, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1522748375116063, |
|
"grad_norm": 4.495102882385254, |
|
"learning_rate": 4.701488829641845e-06, |
|
"loss": 2.2117, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.15598885793871867, |
|
"grad_norm": 4.308347702026367, |
|
"learning_rate": 4.646121984004666e-06, |
|
"loss": 1.9536, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.15970287836583102, |
|
"grad_norm": 4.461782455444336, |
|
"learning_rate": 4.586433134303257e-06, |
|
"loss": 2.3499, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.16341689879294335, |
|
"grad_norm": 4.448497295379639, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 1.6131, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1671309192200557, |
|
"grad_norm": 4.729413032531738, |
|
"learning_rate": 4.454578706170075e-06, |
|
"loss": 1.7194, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17084493964716807, |
|
"grad_norm": 4.8837995529174805, |
|
"learning_rate": 4.382678665009028e-06, |
|
"loss": 2.0311, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1745589600742804, |
|
"grad_norm": 5.577359199523926, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"loss": 2.2559, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.17827298050139276, |
|
"grad_norm": 6.341835021972656, |
|
"learning_rate": 4.227656622467162e-06, |
|
"loss": 1.9221, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.18198700092850512, |
|
"grad_norm": 7.828035354614258, |
|
"learning_rate": 4.144846814849282e-06, |
|
"loss": 2.4759, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.18570102135561745, |
|
"grad_norm": 8.434853553771973, |
|
"learning_rate": 4.058724504646834e-06, |
|
"loss": 3.2325, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18570102135561745, |
|
"eval_loss": 1.611772060394287, |
|
"eval_runtime": 33.8579, |
|
"eval_samples_per_second": 13.409, |
|
"eval_steps_per_second": 1.684, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1894150417827298, |
|
"grad_norm": 1.2142438888549805, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 0.8321, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.19312906220984216, |
|
"grad_norm": 2.127845048904419, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"loss": 1.1498, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1968430826369545, |
|
"grad_norm": 2.3324849605560303, |
|
"learning_rate": 3.782248193514766e-06, |
|
"loss": 1.2167, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.20055710306406685, |
|
"grad_norm": 2.875474691390991, |
|
"learning_rate": 3.684671656182497e-06, |
|
"loss": 1.3003, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2042711234911792, |
|
"grad_norm": 2.1670145988464355, |
|
"learning_rate": 3.5847093477938955e-06, |
|
"loss": 1.3164, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.20798514391829154, |
|
"grad_norm": 2.674121379852295, |
|
"learning_rate": 3.4825625791348093e-06, |
|
"loss": 1.2361, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2116991643454039, |
|
"grad_norm": 2.671863555908203, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"loss": 1.2381, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.21541318477251625, |
|
"grad_norm": 2.4426980018615723, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 1.3495, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2191272051996286, |
|
"grad_norm": 2.743527412414551, |
|
"learning_rate": 3.165092113916688e-06, |
|
"loss": 1.3194, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.22284122562674094, |
|
"grad_norm": 2.485083818435669, |
|
"learning_rate": 3.056302334890786e-06, |
|
"loss": 1.3947, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2265552460538533, |
|
"grad_norm": 2.3887181282043457, |
|
"learning_rate": 2.946392236996592e-06, |
|
"loss": 1.2135, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.23026926648096566, |
|
"grad_norm": 2.7468771934509277, |
|
"learning_rate": 2.835583164544139e-06, |
|
"loss": 1.2523, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.233983286908078, |
|
"grad_norm": 2.5111334323883057, |
|
"learning_rate": 2.724098272258584e-06, |
|
"loss": 1.2471, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.23769730733519034, |
|
"grad_norm": 2.4568049907684326, |
|
"learning_rate": 2.6121620758762877e-06, |
|
"loss": 1.1975, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2414113277623027, |
|
"grad_norm": 2.5363759994506836, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.1698, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.24512534818941503, |
|
"grad_norm": 2.9774973392486572, |
|
"learning_rate": 2.3878379241237136e-06, |
|
"loss": 0.9025, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2488393686165274, |
|
"grad_norm": 3.5985605716705322, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"loss": 1.22, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2525533890436397, |
|
"grad_norm": 2.659881830215454, |
|
"learning_rate": 2.1644168354558623e-06, |
|
"loss": 1.1345, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2562674094707521, |
|
"grad_norm": 2.756131172180176, |
|
"learning_rate": 2.053607763003409e-06, |
|
"loss": 1.1833, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.25998142989786444, |
|
"grad_norm": 2.8494410514831543, |
|
"learning_rate": 1.9436976651092143e-06, |
|
"loss": 1.3538, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26369545032497677, |
|
"grad_norm": 2.196274518966675, |
|
"learning_rate": 1.8349078860833125e-06, |
|
"loss": 0.8695, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.26740947075208915, |
|
"grad_norm": 2.421745538711548, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 1.1275, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2711234911792015, |
|
"grad_norm": 3.1137969493865967, |
|
"learning_rate": 1.6215629397966432e-06, |
|
"loss": 1.2171, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.2748375116063138, |
|
"grad_norm": 2.646501064300537, |
|
"learning_rate": 1.5174374208651913e-06, |
|
"loss": 1.234, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2785515320334262, |
|
"grad_norm": 2.5165648460388184, |
|
"learning_rate": 1.415290652206105e-06, |
|
"loss": 1.1426, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2822655524605385, |
|
"grad_norm": 2.521559476852417, |
|
"learning_rate": 1.3153283438175036e-06, |
|
"loss": 1.0068, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.28597957288765086, |
|
"grad_norm": 2.283813238143921, |
|
"learning_rate": 1.217751806485235e-06, |
|
"loss": 1.1615, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.28969359331476324, |
|
"grad_norm": 3.4305078983306885, |
|
"learning_rate": 1.122757546369744e-06, |
|
"loss": 1.1356, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2934076137418756, |
|
"grad_norm": 2.714026927947998, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 1.086, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2971216341689879, |
|
"grad_norm": 2.8863515853881836, |
|
"learning_rate": 9.412754953531664e-07, |
|
"loss": 0.854, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3008356545961003, |
|
"grad_norm": 2.6695327758789062, |
|
"learning_rate": 8.551531851507186e-07, |
|
"loss": 1.1173, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3045496750232126, |
|
"grad_norm": 2.6120901107788086, |
|
"learning_rate": 7.723433775328385e-07, |
|
"loss": 0.9034, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.308263695450325, |
|
"grad_norm": 2.606943368911743, |
|
"learning_rate": 6.930128404315214e-07, |
|
"loss": 1.0242, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.31197771587743733, |
|
"grad_norm": 2.955063819885254, |
|
"learning_rate": 6.17321334990973e-07, |
|
"loss": 0.9499, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.31569173630454966, |
|
"grad_norm": 2.4737842082977295, |
|
"learning_rate": 5.454212938299256e-07, |
|
"loss": 1.0119, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.31940575673166205, |
|
"grad_norm": 3.265795946121216, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 0.855, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.3231197771587744, |
|
"grad_norm": 3.479175090789795, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"loss": 0.8147, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.3268337975858867, |
|
"grad_norm": 3.1530840396881104, |
|
"learning_rate": 3.538780159953348e-07, |
|
"loss": 1.1186, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3305478180129991, |
|
"grad_norm": 4.578067779541016, |
|
"learning_rate": 2.98511170358155e-07, |
|
"loss": 0.9307, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3342618384401114, |
|
"grad_norm": 3.7903590202331543, |
|
"learning_rate": 2.4757783024395244e-07, |
|
"loss": 0.8859, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.33797585886722376, |
|
"grad_norm": 4.344192028045654, |
|
"learning_rate": 2.0118056862137358e-07, |
|
"loss": 0.9719, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.34168987929433614, |
|
"grad_norm": 6.286164283752441, |
|
"learning_rate": 1.59412823400657e-07, |
|
"loss": 1.1164, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.34540389972144847, |
|
"grad_norm": 4.787858963012695, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 1.0283, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3491179201485608, |
|
"grad_norm": 7.135058403015137, |
|
"learning_rate": 9.00928482603669e-08, |
|
"loss": 0.9365, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.3528319405756732, |
|
"grad_norm": 7.054929256439209, |
|
"learning_rate": 6.268021954544095e-08, |
|
"loss": 1.0428, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3565459610027855, |
|
"grad_norm": 8.841050148010254, |
|
"learning_rate": 4.017602850342584e-08, |
|
"loss": 0.9227, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.36025998142989785, |
|
"grad_norm": 9.116321563720703, |
|
"learning_rate": 2.262559558016325e-08, |
|
"loss": 1.0278, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.36397400185701023, |
|
"grad_norm": 8.069734573364258, |
|
"learning_rate": 1.006426501190233e-08, |
|
"loss": 1.2367, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.36768802228412256, |
|
"grad_norm": 10.388524055480957, |
|
"learning_rate": 2.5173336467135266e-09, |
|
"loss": 1.0777, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3714020427112349, |
|
"grad_norm": 9.496840476989746, |
|
"learning_rate": 0.0, |
|
"loss": 1.8257, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3714020427112349, |
|
"eval_loss": 0.974936306476593, |
|
"eval_runtime": 33.8821, |
|
"eval_samples_per_second": 13.399, |
|
"eval_steps_per_second": 1.682, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.35249644224512e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|