|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 58, |
|
"global_step": 116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008620689655172414, |
|
"grad_norm": 0.7549098134040833, |
|
"learning_rate": 2e-05, |
|
"loss": 2.3785, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008620689655172414, |
|
"eval_loss": 2.582935094833374, |
|
"eval_runtime": 7.6921, |
|
"eval_samples_per_second": 25.351, |
|
"eval_steps_per_second": 3.25, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017241379310344827, |
|
"grad_norm": 0.720708966255188, |
|
"learning_rate": 4e-05, |
|
"loss": 2.2748, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02586206896551724, |
|
"grad_norm": 0.6924182176589966, |
|
"learning_rate": 6e-05, |
|
"loss": 2.4147, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.034482758620689655, |
|
"grad_norm": 0.8156349658966064, |
|
"learning_rate": 8e-05, |
|
"loss": 2.3778, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04310344827586207, |
|
"grad_norm": 0.7163665294647217, |
|
"learning_rate": 0.0001, |
|
"loss": 2.4918, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05172413793103448, |
|
"grad_norm": 0.9344202280044556, |
|
"learning_rate": 0.00012, |
|
"loss": 2.5429, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0603448275862069, |
|
"grad_norm": 0.8686364889144897, |
|
"learning_rate": 0.00014, |
|
"loss": 2.0592, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06896551724137931, |
|
"grad_norm": 0.9987367987632751, |
|
"learning_rate": 0.00016, |
|
"loss": 1.8856, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07758620689655173, |
|
"grad_norm": 1.5086486339569092, |
|
"learning_rate": 0.00018, |
|
"loss": 1.8125, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08620689655172414, |
|
"grad_norm": 1.8301935195922852, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1144, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09482758620689655, |
|
"grad_norm": 2.5455808639526367, |
|
"learning_rate": 0.00019995608365087946, |
|
"loss": 1.4668, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.10344827586206896, |
|
"grad_norm": 3.3915581703186035, |
|
"learning_rate": 0.00019982437317643217, |
|
"loss": 1.3056, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11206896551724138, |
|
"grad_norm": 3.114424705505371, |
|
"learning_rate": 0.0001996049842615217, |
|
"loss": 1.0526, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1206896551724138, |
|
"grad_norm": 2.363276243209839, |
|
"learning_rate": 0.00019929810960135172, |
|
"loss": 0.729, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12931034482758622, |
|
"grad_norm": 2.7170567512512207, |
|
"learning_rate": 0.0001989040187322164, |
|
"loss": 0.5538, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"grad_norm": 2.299168109893799, |
|
"learning_rate": 0.00019842305779475968, |
|
"loss": 0.3469, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.14655172413793102, |
|
"grad_norm": 1.1169408559799194, |
|
"learning_rate": 0.0001978556492299504, |
|
"loss": 0.1737, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.15517241379310345, |
|
"grad_norm": 8.511645317077637, |
|
"learning_rate": 0.0001972022914080411, |
|
"loss": 0.2247, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.16379310344827586, |
|
"grad_norm": 1.6103713512420654, |
|
"learning_rate": 0.00019646355819083589, |
|
"loss": 0.1969, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 1.5031853914260864, |
|
"learning_rate": 0.00019564009842765225, |
|
"loss": 0.1419, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1810344827586207, |
|
"grad_norm": 1.3411375284194946, |
|
"learning_rate": 0.00019473263538541914, |
|
"loss": 0.111, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1896551724137931, |
|
"grad_norm": 0.4128853678703308, |
|
"learning_rate": 0.0001937419661134121, |
|
"loss": 0.0782, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.19827586206896552, |
|
"grad_norm": 0.40607255697250366, |
|
"learning_rate": 0.00019266896074318334, |
|
"loss": 0.0601, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.20689655172413793, |
|
"grad_norm": 1.0385711193084717, |
|
"learning_rate": 0.00019151456172430183, |
|
"loss": 0.0881, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.21551724137931033, |
|
"grad_norm": 0.5099380016326904, |
|
"learning_rate": 0.00019027978299657436, |
|
"loss": 0.1187, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.22413793103448276, |
|
"grad_norm": 0.2118569314479828, |
|
"learning_rate": 0.00018896570909947475, |
|
"loss": 0.1058, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.23275862068965517, |
|
"grad_norm": 0.37697386741638184, |
|
"learning_rate": 0.0001875734942195637, |
|
"loss": 0.0779, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2413793103448276, |
|
"grad_norm": 0.5538561940193176, |
|
"learning_rate": 0.00018610436117673555, |
|
"loss": 0.1473, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.13844181597232819, |
|
"learning_rate": 0.0001845596003501826, |
|
"loss": 0.0387, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.25862068965517243, |
|
"grad_norm": 0.28886792063713074, |
|
"learning_rate": 0.0001829405685450202, |
|
"loss": 0.0484, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2672413793103448, |
|
"grad_norm": 0.40534093976020813, |
|
"learning_rate": 0.00018124868780056814, |
|
"loss": 0.0506, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 0.28827980160713196, |
|
"learning_rate": 0.00017948544414133534, |
|
"loss": 0.0386, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.28448275862068967, |
|
"grad_norm": 0.19116564095020294, |
|
"learning_rate": 0.00017765238627180424, |
|
"loss": 0.0344, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.29310344827586204, |
|
"grad_norm": 0.2074674665927887, |
|
"learning_rate": 0.00017575112421616202, |
|
"loss": 0.0587, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3017241379310345, |
|
"grad_norm": 0.18823839724063873, |
|
"learning_rate": 0.00017378332790417273, |
|
"loss": 0.0449, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3103448275862069, |
|
"grad_norm": 0.2116956263780594, |
|
"learning_rate": 0.00017175072570443312, |
|
"loss": 0.0547, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.31896551724137934, |
|
"grad_norm": 0.2971654534339905, |
|
"learning_rate": 0.00016965510290629972, |
|
"loss": 0.0417, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3275862068965517, |
|
"grad_norm": 0.11563872545957565, |
|
"learning_rate": 0.00016749830015182107, |
|
"loss": 0.0196, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.33620689655172414, |
|
"grad_norm": 0.09132111817598343, |
|
"learning_rate": 0.00016528221181905217, |
|
"loss": 0.015, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 0.11758074164390564, |
|
"learning_rate": 0.00016300878435817113, |
|
"loss": 0.0264, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.35344827586206895, |
|
"grad_norm": 0.26003605127334595, |
|
"learning_rate": 0.00016068001458185936, |
|
"loss": 0.0701, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.3620689655172414, |
|
"grad_norm": 0.16987216472625732, |
|
"learning_rate": 0.0001582979479114472, |
|
"loss": 0.0232, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3706896551724138, |
|
"grad_norm": 0.16075719892978668, |
|
"learning_rate": 0.00015586467658036524, |
|
"loss": 0.0257, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.3793103448275862, |
|
"grad_norm": 0.08301292359828949, |
|
"learning_rate": 0.0001533823377964791, |
|
"loss": 0.0059, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3879310344827586, |
|
"grad_norm": 0.13616588711738586, |
|
"learning_rate": 0.00015085311186492206, |
|
"loss": 0.0207, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.39655172413793105, |
|
"grad_norm": 0.7669395804405212, |
|
"learning_rate": 0.00014827922027307451, |
|
"loss": 0.0992, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4051724137931034, |
|
"grad_norm": 0.2077718824148178, |
|
"learning_rate": 0.0001456629237393713, |
|
"loss": 0.0193, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"grad_norm": 0.08794950693845749, |
|
"learning_rate": 0.00014300652022765207, |
|
"loss": 0.0107, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.4224137931034483, |
|
"grad_norm": 0.1444607675075531, |
|
"learning_rate": 0.00014031234292879725, |
|
"loss": 0.0269, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.43103448275862066, |
|
"grad_norm": 0.22816063463687897, |
|
"learning_rate": 0.00013758275821142382, |
|
"loss": 0.0201, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4396551724137931, |
|
"grad_norm": 0.24295277893543243, |
|
"learning_rate": 0.0001348201635434399, |
|
"loss": 0.0304, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.4482758620689655, |
|
"grad_norm": 0.22172880172729492, |
|
"learning_rate": 0.00013202698538628376, |
|
"loss": 0.0128, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.45689655172413796, |
|
"grad_norm": 0.12180917710065842, |
|
"learning_rate": 0.00012920567706369758, |
|
"loss": 0.016, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.46551724137931033, |
|
"grad_norm": 0.278648316860199, |
|
"learning_rate": 0.00012635871660690676, |
|
"loss": 0.0337, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.47413793103448276, |
|
"grad_norm": 0.0709623172879219, |
|
"learning_rate": 0.00012348860457809838, |
|
"loss": 0.0126, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4827586206896552, |
|
"grad_norm": 0.1974678933620453, |
|
"learning_rate": 0.00012059786187410984, |
|
"loss": 0.019, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.49137931034482757, |
|
"grad_norm": 0.08393372595310211, |
|
"learning_rate": 0.0001176890275122573, |
|
"loss": 0.0083, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.12021597474813461, |
|
"learning_rate": 0.00011476465640024814, |
|
"loss": 0.0084, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.02141181007027626, |
|
"eval_runtime": 7.7496, |
|
"eval_samples_per_second": 25.163, |
|
"eval_steps_per_second": 3.226, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5086206896551724, |
|
"grad_norm": 0.12311367690563202, |
|
"learning_rate": 0.00011182731709213659, |
|
"loss": 0.0259, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 0.03950917348265648, |
|
"learning_rate": 0.00010887958953229349, |
|
"loss": 0.0033, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5258620689655172, |
|
"grad_norm": 0.12783688306808472, |
|
"learning_rate": 0.00010592406278937144, |
|
"loss": 0.0094, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5344827586206896, |
|
"grad_norm": 0.23351043462753296, |
|
"learning_rate": 0.00010296333278225599, |
|
"loss": 0.0405, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5431034482758621, |
|
"grad_norm": 0.07918150722980499, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0189, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 0.129180446267128, |
|
"learning_rate": 9.703666721774402e-05, |
|
"loss": 0.0118, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5603448275862069, |
|
"grad_norm": 0.20834843814373016, |
|
"learning_rate": 9.407593721062859e-05, |
|
"loss": 0.0106, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5689655172413793, |
|
"grad_norm": 0.03432063013315201, |
|
"learning_rate": 9.112041046770653e-05, |
|
"loss": 0.0031, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5775862068965517, |
|
"grad_norm": 0.3878253698348999, |
|
"learning_rate": 8.817268290786343e-05, |
|
"loss": 0.0199, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.5862068965517241, |
|
"grad_norm": 0.19941666722297668, |
|
"learning_rate": 8.523534359975189e-05, |
|
"loss": 0.0282, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5948275862068966, |
|
"grad_norm": 0.28532180190086365, |
|
"learning_rate": 8.231097248774274e-05, |
|
"loss": 0.0231, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.603448275862069, |
|
"grad_norm": 0.19040793180465698, |
|
"learning_rate": 7.940213812589018e-05, |
|
"loss": 0.0239, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6120689655172413, |
|
"grad_norm": 0.15338897705078125, |
|
"learning_rate": 7.651139542190164e-05, |
|
"loss": 0.0096, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6206896551724138, |
|
"grad_norm": 0.21563775837421417, |
|
"learning_rate": 7.364128339309326e-05, |
|
"loss": 0.0182, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6293103448275862, |
|
"grad_norm": 0.11136949807405472, |
|
"learning_rate": 7.079432293630244e-05, |
|
"loss": 0.0198, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6379310344827587, |
|
"grad_norm": 0.06761553883552551, |
|
"learning_rate": 6.797301461371625e-05, |
|
"loss": 0.0061, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.646551724137931, |
|
"grad_norm": 0.17843712866306305, |
|
"learning_rate": 6.517983645656014e-05, |
|
"loss": 0.0105, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6551724137931034, |
|
"grad_norm": 0.12946659326553345, |
|
"learning_rate": 6.24172417885762e-05, |
|
"loss": 0.0148, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6637931034482759, |
|
"grad_norm": 0.04445435479283333, |
|
"learning_rate": 5.96876570712028e-05, |
|
"loss": 0.0046, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.6724137931034483, |
|
"grad_norm": 0.12194405496120453, |
|
"learning_rate": 5.699347977234799e-05, |
|
"loss": 0.021, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6810344827586207, |
|
"grad_norm": 0.1209009662270546, |
|
"learning_rate": 5.43370762606287e-05, |
|
"loss": 0.0038, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 0.18816731870174408, |
|
"learning_rate": 5.172077972692553e-05, |
|
"loss": 0.0175, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6982758620689655, |
|
"grad_norm": 0.0713019147515297, |
|
"learning_rate": 4.914688813507797e-05, |
|
"loss": 0.0033, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7068965517241379, |
|
"grad_norm": 0.06428212672472, |
|
"learning_rate": 4.661766220352097e-05, |
|
"loss": 0.0111, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7155172413793104, |
|
"grad_norm": 0.06654125452041626, |
|
"learning_rate": 4.4135323419634766e-05, |
|
"loss": 0.0088, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7241379310344828, |
|
"grad_norm": 0.08924495428800583, |
|
"learning_rate": 4.170205208855281e-05, |
|
"loss": 0.0104, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7327586206896551, |
|
"grad_norm": 0.1306809037923813, |
|
"learning_rate": 3.931998541814069e-05, |
|
"loss": 0.014, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7413793103448276, |
|
"grad_norm": 0.6109310984611511, |
|
"learning_rate": 3.69912156418289e-05, |
|
"loss": 0.0169, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.1788921356201172, |
|
"learning_rate": 3.471778818094785e-05, |
|
"loss": 0.0139, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7586206896551724, |
|
"grad_norm": 0.07347411662340164, |
|
"learning_rate": 3.250169984817897e-05, |
|
"loss": 0.0113, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7672413793103449, |
|
"grad_norm": 0.2131931632757187, |
|
"learning_rate": 3.034489709370033e-05, |
|
"loss": 0.0268, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.7758620689655172, |
|
"grad_norm": 0.06642799824476242, |
|
"learning_rate": 2.8249274295566864e-05, |
|
"loss": 0.0124, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7844827586206896, |
|
"grad_norm": 0.26418986916542053, |
|
"learning_rate": 2.6216672095827266e-05, |
|
"loss": 0.013, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.7931034482758621, |
|
"grad_norm": 0.016451340168714523, |
|
"learning_rate": 2.4248875783837987e-05, |
|
"loss": 0.0018, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8017241379310345, |
|
"grad_norm": 0.10589156299829483, |
|
"learning_rate": 2.234761372819577e-05, |
|
"loss": 0.0198, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8103448275862069, |
|
"grad_norm": 0.09367933124303818, |
|
"learning_rate": 2.0514555858664663e-05, |
|
"loss": 0.0067, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8189655172413793, |
|
"grad_norm": 0.04479554295539856, |
|
"learning_rate": 1.875131219943187e-05, |
|
"loss": 0.0029, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 0.06302911043167114, |
|
"learning_rate": 1.7059431454979824e-05, |
|
"loss": 0.0052, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.8362068965517241, |
|
"grad_norm": 0.05771315097808838, |
|
"learning_rate": 1.5440399649817385e-05, |
|
"loss": 0.0082, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8448275862068966, |
|
"grad_norm": 0.2593729794025421, |
|
"learning_rate": 1.3895638823264446e-05, |
|
"loss": 0.0174, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.853448275862069, |
|
"grad_norm": 0.0901479423046112, |
|
"learning_rate": 1.2426505780436326e-05, |
|
"loss": 0.0059, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 0.3058486580848694, |
|
"learning_rate": 1.103429090052528e-05, |
|
"loss": 0.0136, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8706896551724138, |
|
"grad_norm": 0.13096828758716583, |
|
"learning_rate": 9.720217003425647e-06, |
|
"loss": 0.0102, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.8793103448275862, |
|
"grad_norm": 0.04190494120121002, |
|
"learning_rate": 8.485438275698154e-06, |
|
"loss": 0.0055, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.8879310344827587, |
|
"grad_norm": 0.247361958026886, |
|
"learning_rate": 7.331039256816663e-06, |
|
"loss": 0.0212, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.896551724137931, |
|
"grad_norm": 0.07285796105861664, |
|
"learning_rate": 6.258033886587911e-06, |
|
"loss": 0.0116, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9051724137931034, |
|
"grad_norm": 0.08532743901014328, |
|
"learning_rate": 5.267364614580861e-06, |
|
"loss": 0.0147, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9137931034482759, |
|
"grad_norm": 0.042795587331056595, |
|
"learning_rate": 4.359901572347758e-06, |
|
"loss": 0.0031, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9224137931034483, |
|
"grad_norm": 0.05108323693275452, |
|
"learning_rate": 3.5364418091641373e-06, |
|
"loss": 0.0042, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.9310344827586207, |
|
"grad_norm": 0.14668087661266327, |
|
"learning_rate": 2.7977085919589254e-06, |
|
"loss": 0.0108, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9396551724137931, |
|
"grad_norm": 0.0402313657104969, |
|
"learning_rate": 2.144350770049597e-06, |
|
"loss": 0.0029, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.9482758620689655, |
|
"grad_norm": 0.19336402416229248, |
|
"learning_rate": 1.576942205240317e-06, |
|
"loss": 0.0202, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9568965517241379, |
|
"grad_norm": 0.5075563192367554, |
|
"learning_rate": 1.0959812677835968e-06, |
|
"loss": 0.0196, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.9655172413793104, |
|
"grad_norm": 0.13074348866939545, |
|
"learning_rate": 7.018903986483083e-07, |
|
"loss": 0.0208, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.9741379310344828, |
|
"grad_norm": 0.09440784156322479, |
|
"learning_rate": 3.950157384783104e-07, |
|
"loss": 0.0137, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.9827586206896551, |
|
"grad_norm": 0.11262812465429306, |
|
"learning_rate": 1.7562682356786487e-07, |
|
"loss": 0.0164, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.9913793103448276, |
|
"grad_norm": 0.09802548587322235, |
|
"learning_rate": 4.391634912056519e-08, |
|
"loss": 0.0046, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.1202988475561142, |
|
"learning_rate": 0.0, |
|
"loss": 0.0165, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.013442743569612503, |
|
"eval_runtime": 7.6757, |
|
"eval_samples_per_second": 25.405, |
|
"eval_steps_per_second": 3.257, |
|
"step": 116 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8011375527854080.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|