|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.010662614873014922, |
|
"eval_steps": 25, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00010662614873014921, |
|
"grad_norm": 0.09556058794260025, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8347, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00010662614873014921, |
|
"eval_loss": 10.834606170654297, |
|
"eval_runtime": 38.6886, |
|
"eval_samples_per_second": 408.285, |
|
"eval_steps_per_second": 204.143, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00021325229746029843, |
|
"grad_norm": 0.09524369239807129, |
|
"learning_rate": 2e-05, |
|
"loss": 10.8334, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00031987844619044766, |
|
"grad_norm": 0.10252054035663605, |
|
"learning_rate": 3e-05, |
|
"loss": 10.8366, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00042650459492059686, |
|
"grad_norm": 0.09081214666366577, |
|
"learning_rate": 4e-05, |
|
"loss": 10.8363, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0005331307436507461, |
|
"grad_norm": 0.0959392711520195, |
|
"learning_rate": 5e-05, |
|
"loss": 10.8339, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0006397568923808953, |
|
"grad_norm": 0.08222699910402298, |
|
"learning_rate": 6e-05, |
|
"loss": 10.831, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0007463830411110445, |
|
"grad_norm": 0.08778945356607437, |
|
"learning_rate": 7e-05, |
|
"loss": 10.8336, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0008530091898411937, |
|
"grad_norm": 0.08124235272407532, |
|
"learning_rate": 8e-05, |
|
"loss": 10.8334, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0009596353385713429, |
|
"grad_norm": 0.09498875588178635, |
|
"learning_rate": 9e-05, |
|
"loss": 10.8362, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0010662614873014921, |
|
"grad_norm": 0.10160056501626968, |
|
"learning_rate": 0.0001, |
|
"loss": 10.8318, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0011728876360316413, |
|
"grad_norm": 0.09773286432027817, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 10.8302, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0012795137847617906, |
|
"grad_norm": 0.09210336208343506, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 10.8307, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0013861399334919398, |
|
"grad_norm": 0.10203942656517029, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 10.8322, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.001492766082222089, |
|
"grad_norm": 0.09077027440071106, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 10.8305, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.001599392230952238, |
|
"grad_norm": 0.09403304755687714, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 10.8301, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0017060183796823874, |
|
"grad_norm": 0.10645323991775513, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 10.8306, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0018126445284125366, |
|
"grad_norm": 0.10251530259847641, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 10.8273, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0019192706771426857, |
|
"grad_norm": 0.10220959782600403, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 10.8284, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.002025896825872835, |
|
"grad_norm": 0.09652484208345413, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 10.8268, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0021325229746029842, |
|
"grad_norm": 0.1007598266005516, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 10.8307, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0022391491233331334, |
|
"grad_norm": 0.10961519926786423, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 10.8255, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0023457752720632825, |
|
"grad_norm": 0.10666362196207047, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 10.8288, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0024524014207934317, |
|
"grad_norm": 0.12087945640087128, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 10.8274, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0025590275695235812, |
|
"grad_norm": 0.10848045349121094, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 10.8294, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0026656537182537304, |
|
"grad_norm": 0.0989118367433548, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 10.8252, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0026656537182537304, |
|
"eval_loss": 10.827045440673828, |
|
"eval_runtime": 27.3777, |
|
"eval_samples_per_second": 576.966, |
|
"eval_steps_per_second": 288.483, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0027722798669838795, |
|
"grad_norm": 0.1026577427983284, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 10.8233, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0028789060157140287, |
|
"grad_norm": 0.10963460057973862, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 10.8291, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002985532164444178, |
|
"grad_norm": 0.1128157526254654, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 10.8313, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.003092158313174327, |
|
"grad_norm": 0.11681310087442398, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 10.8269, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.003198784461904476, |
|
"grad_norm": 0.10859231650829315, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 10.8291, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0033054106106346257, |
|
"grad_norm": 0.12530773878097534, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 10.8255, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003412036759364775, |
|
"grad_norm": 0.1253245323896408, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 10.8218, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.003518662908094924, |
|
"grad_norm": 0.1236739233136177, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 10.8226, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.003625289056825073, |
|
"grad_norm": 0.13345038890838623, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 10.8218, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0037319152055552223, |
|
"grad_norm": 0.13090018928050995, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 10.8221, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0038385413542853714, |
|
"grad_norm": 0.12226233631372452, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 10.8265, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.003945167503015521, |
|
"grad_norm": 0.13375620543956757, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 10.8199, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.00405179365174567, |
|
"grad_norm": 0.1250225007534027, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 10.8218, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.004158419800475819, |
|
"grad_norm": 0.12829391658306122, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 10.8198, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0042650459492059685, |
|
"grad_norm": 0.14134521782398224, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 10.8201, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004371672097936118, |
|
"grad_norm": 0.13157622516155243, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 10.8164, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.004478298246666267, |
|
"grad_norm": 0.13519853353500366, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 10.8196, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.004584924395396416, |
|
"grad_norm": 0.14817938208580017, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 10.8187, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.004691550544126565, |
|
"grad_norm": 0.15691693127155304, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 10.8158, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.004798176692856714, |
|
"grad_norm": 0.1444297581911087, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 10.8157, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004904802841586863, |
|
"grad_norm": 0.1577857881784439, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 10.8182, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0050114289903170125, |
|
"grad_norm": 0.15604552626609802, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 10.8133, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0051180551390471625, |
|
"grad_norm": 0.15347470343112946, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 10.8167, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.005224681287777312, |
|
"grad_norm": 0.13201665878295898, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 10.8135, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.005331307436507461, |
|
"grad_norm": 0.14732328057289124, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 10.8147, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005331307436507461, |
|
"eval_loss": 10.814597129821777, |
|
"eval_runtime": 27.1128, |
|
"eval_samples_per_second": 582.603, |
|
"eval_steps_per_second": 291.302, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00543793358523761, |
|
"grad_norm": 0.14409305155277252, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 10.8113, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.005544559733967759, |
|
"grad_norm": 0.1595422625541687, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 10.8159, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.005651185882697908, |
|
"grad_norm": 0.16688624024391174, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 10.8131, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.005757812031428057, |
|
"grad_norm": 0.1620035171508789, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 10.8132, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0058644381801582065, |
|
"grad_norm": 0.16980452835559845, |
|
"learning_rate": 5e-05, |
|
"loss": 10.8129, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.005971064328888356, |
|
"grad_norm": 0.1535421460866928, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 10.815, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.006077690477618505, |
|
"grad_norm": 0.15803158283233643, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 10.8153, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.006184316626348654, |
|
"grad_norm": 0.15988963842391968, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 10.8119, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.006290942775078803, |
|
"grad_norm": 0.17502981424331665, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 10.8123, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.006397568923808952, |
|
"grad_norm": 0.1722036898136139, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 10.8102, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.006504195072539101, |
|
"grad_norm": 0.16091673076152802, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 10.8059, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.006610821221269251, |
|
"grad_norm": 0.17409998178482056, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 10.8091, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0067174473699994006, |
|
"grad_norm": 0.20849758386611938, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 10.799, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.00682407351872955, |
|
"grad_norm": 0.16407425701618195, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 10.8124, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.006930699667459699, |
|
"grad_norm": 0.18425709009170532, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 10.8096, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.007037325816189848, |
|
"grad_norm": 0.17111074924468994, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 10.8096, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.007143951964919997, |
|
"grad_norm": 0.17965374886989594, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 10.8084, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.007250578113650146, |
|
"grad_norm": 0.19009314477443695, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 10.807, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0073572042623802954, |
|
"grad_norm": 0.18505646288394928, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 10.8075, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.007463830411110445, |
|
"grad_norm": 0.17133325338363647, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 10.8074, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.007570456559840594, |
|
"grad_norm": 0.16672495007514954, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 10.8064, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.007677082708570743, |
|
"grad_norm": 0.18847358226776123, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 10.8077, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.007783708857300892, |
|
"grad_norm": 0.17686216533184052, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 10.808, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.007890335006031042, |
|
"grad_norm": 0.1777198761701584, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 10.8043, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.00799696115476119, |
|
"grad_norm": 0.17206279933452606, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 10.8071, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.00799696115476119, |
|
"eval_loss": 10.805438995361328, |
|
"eval_runtime": 50.9217, |
|
"eval_samples_per_second": 310.202, |
|
"eval_steps_per_second": 155.101, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.00810358730349134, |
|
"grad_norm": 0.19393402338027954, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 10.8052, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.008210213452221489, |
|
"grad_norm": 0.17306220531463623, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 10.8092, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.008316839600951639, |
|
"grad_norm": 0.18071946501731873, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 10.8096, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.008423465749681787, |
|
"grad_norm": 0.18262875080108643, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 10.8112, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.008530091898411937, |
|
"grad_norm": 0.18377819657325745, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 10.8096, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.008636718047142085, |
|
"grad_norm": 0.1848413050174713, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 10.8044, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.008743344195872235, |
|
"grad_norm": 0.1873786747455597, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 10.8058, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.008849970344602385, |
|
"grad_norm": 0.20228584110736847, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 10.8059, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.008956596493332534, |
|
"grad_norm": 0.19265606999397278, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 10.8088, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.009063222642062684, |
|
"grad_norm": 0.17763932049274445, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 10.7996, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.009169848790792832, |
|
"grad_norm": 0.17844876646995544, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 10.802, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.009276474939522982, |
|
"grad_norm": 0.17362311482429504, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 10.8022, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.00938310108825313, |
|
"grad_norm": 0.18764075636863708, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 10.8014, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.00948972723698328, |
|
"grad_norm": 0.20152869820594788, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 10.8044, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.009596353385713428, |
|
"grad_norm": 0.19738389551639557, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 10.8073, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.009702979534443578, |
|
"grad_norm": 0.19693444669246674, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 10.7984, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.009809605683173727, |
|
"grad_norm": 0.20395702123641968, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 10.801, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.009916231831903877, |
|
"grad_norm": 0.1859755963087082, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 10.8075, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.010022857980634025, |
|
"grad_norm": 0.18670888245105743, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 10.8028, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.010129484129364175, |
|
"grad_norm": 0.1718273162841797, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 10.806, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.010236110278094325, |
|
"grad_norm": 0.2033439427614212, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 10.8003, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.010342736426824473, |
|
"grad_norm": 0.16593502461910248, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 10.8075, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.010449362575554623, |
|
"grad_norm": 0.1901598572731018, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 10.8042, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.010555988724284772, |
|
"grad_norm": 0.1914721131324768, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 10.8042, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.010662614873014922, |
|
"grad_norm": 0.19154879450798035, |
|
"learning_rate": 0.0, |
|
"loss": 10.8053, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.010662614873014922, |
|
"eval_loss": 10.803692817687988, |
|
"eval_runtime": 73.6667, |
|
"eval_samples_per_second": 214.425, |
|
"eval_steps_per_second": 107.213, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 44295971143680.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|