|
{ |
|
"best_metric": 1.1042157411575317, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 1.0023640661938533, |
|
"eval_steps": 50, |
|
"global_step": 212, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004728132387706856, |
|
"grad_norm": 0.9486807584762573, |
|
"learning_rate": 1e-05, |
|
"loss": 1.8408, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004728132387706856, |
|
"eval_loss": 1.963819146156311, |
|
"eval_runtime": 38.0049, |
|
"eval_samples_per_second": 9.394, |
|
"eval_steps_per_second": 2.368, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009456264775413711, |
|
"grad_norm": 0.9640880227088928, |
|
"learning_rate": 2e-05, |
|
"loss": 1.8428, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.014184397163120567, |
|
"grad_norm": 0.9609369039535522, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9502, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.018912529550827423, |
|
"grad_norm": 0.9109072685241699, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9277, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02364066193853428, |
|
"grad_norm": 0.9222002625465393, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8179, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.028368794326241134, |
|
"grad_norm": 0.7830185890197754, |
|
"learning_rate": 6e-05, |
|
"loss": 1.7072, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03309692671394799, |
|
"grad_norm": 0.6503215432167053, |
|
"learning_rate": 7e-05, |
|
"loss": 1.6178, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.037825059101654845, |
|
"grad_norm": 0.5257905125617981, |
|
"learning_rate": 8e-05, |
|
"loss": 1.5769, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 0.5728737711906433, |
|
"learning_rate": 9e-05, |
|
"loss": 1.5687, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04728132387706856, |
|
"grad_norm": 0.7015601396560669, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6133, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05200945626477541, |
|
"grad_norm": 0.797338604927063, |
|
"learning_rate": 9.999395316300748e-05, |
|
"loss": 1.5002, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05673758865248227, |
|
"grad_norm": 0.6119827628135681, |
|
"learning_rate": 9.997581411459941e-05, |
|
"loss": 1.5473, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.061465721040189124, |
|
"grad_norm": 0.49313884973526, |
|
"learning_rate": 9.994558724213054e-05, |
|
"loss": 1.4589, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06619385342789598, |
|
"grad_norm": 0.4498696029186249, |
|
"learning_rate": 9.990327985667972e-05, |
|
"loss": 1.4562, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07092198581560284, |
|
"grad_norm": 0.38253799080848694, |
|
"learning_rate": 9.984890219128146e-05, |
|
"loss": 1.3475, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07565011820330969, |
|
"grad_norm": 0.40526261925697327, |
|
"learning_rate": 9.978246739845094e-05, |
|
"loss": 1.3564, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08037825059101655, |
|
"grad_norm": 0.4410207271575928, |
|
"learning_rate": 9.970399154700263e-05, |
|
"loss": 1.3799, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 0.4076521098613739, |
|
"learning_rate": 9.961349361816384e-05, |
|
"loss": 1.4092, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08983451536643026, |
|
"grad_norm": 0.4196551442146301, |
|
"learning_rate": 9.951099550098349e-05, |
|
"loss": 1.3264, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.09456264775413711, |
|
"grad_norm": 0.3697325885295868, |
|
"learning_rate": 9.939652198703784e-05, |
|
"loss": 1.3886, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09929078014184398, |
|
"grad_norm": 0.346131294965744, |
|
"learning_rate": 9.927010076443407e-05, |
|
"loss": 1.2821, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.10401891252955082, |
|
"grad_norm": 0.3620453476905823, |
|
"learning_rate": 9.913176241111319e-05, |
|
"loss": 1.3636, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.10874704491725769, |
|
"grad_norm": 0.37463706731796265, |
|
"learning_rate": 9.898154038745408e-05, |
|
"loss": 1.275, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.11347517730496454, |
|
"grad_norm": 0.3912721574306488, |
|
"learning_rate": 9.881947102818036e-05, |
|
"loss": 1.2499, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1182033096926714, |
|
"grad_norm": 0.3786635100841522, |
|
"learning_rate": 9.864559353357187e-05, |
|
"loss": 1.3173, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12293144208037825, |
|
"grad_norm": 0.3728365898132324, |
|
"learning_rate": 9.845994995998332e-05, |
|
"loss": 1.2287, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 0.40303531289100647, |
|
"learning_rate": 9.826258520967178e-05, |
|
"loss": 1.2327, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.13238770685579196, |
|
"grad_norm": 0.3770873546600342, |
|
"learning_rate": 9.805354701993623e-05, |
|
"loss": 1.2309, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.13711583924349882, |
|
"grad_norm": 0.41253548860549927, |
|
"learning_rate": 9.783288595157098e-05, |
|
"loss": 1.3503, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.14184397163120568, |
|
"grad_norm": 0.3858996033668518, |
|
"learning_rate": 9.760065537663649e-05, |
|
"loss": 1.2443, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14657210401891252, |
|
"grad_norm": 0.40391531586647034, |
|
"learning_rate": 9.735691146555002e-05, |
|
"loss": 1.1587, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.15130023640661938, |
|
"grad_norm": 0.40914273262023926, |
|
"learning_rate": 9.710171317349945e-05, |
|
"loss": 1.2247, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.15602836879432624, |
|
"grad_norm": 0.41252586245536804, |
|
"learning_rate": 9.683512222618377e-05, |
|
"loss": 1.1826, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1607565011820331, |
|
"grad_norm": 0.42859312891960144, |
|
"learning_rate": 9.655720310488299e-05, |
|
"loss": 1.1764, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.16548463356973994, |
|
"grad_norm": 0.4406868815422058, |
|
"learning_rate": 9.626802303086208e-05, |
|
"loss": 1.1773, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 0.46166718006134033, |
|
"learning_rate": 9.596765194911181e-05, |
|
"loss": 1.3088, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.17494089834515367, |
|
"grad_norm": 0.4437083303928375, |
|
"learning_rate": 9.565616251143094e-05, |
|
"loss": 1.2081, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.17966903073286053, |
|
"grad_norm": 0.41885045170783997, |
|
"learning_rate": 9.533363005885363e-05, |
|
"loss": 1.2329, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.18439716312056736, |
|
"grad_norm": 0.40488675236701965, |
|
"learning_rate": 9.500013260342651e-05, |
|
"loss": 1.1143, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.18912529550827423, |
|
"grad_norm": 0.4128928780555725, |
|
"learning_rate": 9.465575080933958e-05, |
|
"loss": 1.1915, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1938534278959811, |
|
"grad_norm": 0.4129532277584076, |
|
"learning_rate": 9.430056797341574e-05, |
|
"loss": 1.1523, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.19858156028368795, |
|
"grad_norm": 0.4176290035247803, |
|
"learning_rate": 9.393467000496344e-05, |
|
"loss": 1.1828, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2033096926713948, |
|
"grad_norm": 0.40814054012298584, |
|
"learning_rate": 9.355814540499752e-05, |
|
"loss": 1.1198, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.20803782505910165, |
|
"grad_norm": 0.44160643219947815, |
|
"learning_rate": 9.317108524483318e-05, |
|
"loss": 1.1523, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 0.449492484331131, |
|
"learning_rate": 9.27735831440582e-05, |
|
"loss": 1.1841, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.21749408983451538, |
|
"grad_norm": 0.4739810526371002, |
|
"learning_rate": 9.236573524788887e-05, |
|
"loss": 1.2485, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.4747321605682373, |
|
"learning_rate": 9.194764020391506e-05, |
|
"loss": 1.1819, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.22695035460992907, |
|
"grad_norm": 0.5036101937294006, |
|
"learning_rate": 9.151939913823988e-05, |
|
"loss": 1.2246, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.23167848699763594, |
|
"grad_norm": 0.4752824008464813, |
|
"learning_rate": 9.108111563102004e-05, |
|
"loss": 1.1525, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2364066193853428, |
|
"grad_norm": 0.6015028953552246, |
|
"learning_rate": 9.063289569141251e-05, |
|
"loss": 1.202, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2364066193853428, |
|
"eval_loss": 1.201499104499817, |
|
"eval_runtime": 38.7229, |
|
"eval_samples_per_second": 9.219, |
|
"eval_steps_per_second": 2.324, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24113475177304963, |
|
"grad_norm": 0.5079233646392822, |
|
"learning_rate": 9.017484773193378e-05, |
|
"loss": 1.2754, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2458628841607565, |
|
"grad_norm": 0.43763864040374756, |
|
"learning_rate": 8.970708254223768e-05, |
|
"loss": 1.266, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.25059101654846333, |
|
"grad_norm": 0.42711228132247925, |
|
"learning_rate": 8.92297132623183e-05, |
|
"loss": 1.3319, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 0.3751334547996521, |
|
"learning_rate": 8.87428553551445e-05, |
|
"loss": 1.2406, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.26004728132387706, |
|
"grad_norm": 0.36792027950286865, |
|
"learning_rate": 8.824662657873239e-05, |
|
"loss": 1.2687, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2647754137115839, |
|
"grad_norm": 0.36172714829444885, |
|
"learning_rate": 8.774114695766286e-05, |
|
"loss": 1.1624, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2695035460992908, |
|
"grad_norm": 0.3969741761684418, |
|
"learning_rate": 8.722653875405075e-05, |
|
"loss": 1.1844, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.27423167848699764, |
|
"grad_norm": 0.3591637909412384, |
|
"learning_rate": 8.670292643797302e-05, |
|
"loss": 1.2324, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2789598108747045, |
|
"grad_norm": 0.3697453439235687, |
|
"learning_rate": 8.617043665736249e-05, |
|
"loss": 1.2002, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.28368794326241137, |
|
"grad_norm": 0.36921319365501404, |
|
"learning_rate": 8.562919820737536e-05, |
|
"loss": 1.2023, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28841607565011823, |
|
"grad_norm": 0.37672919034957886, |
|
"learning_rate": 8.507934199923884e-05, |
|
"loss": 1.2226, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.29314420803782504, |
|
"grad_norm": 0.38116613030433655, |
|
"learning_rate": 8.452100102858734e-05, |
|
"loss": 1.2582, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2978723404255319, |
|
"grad_norm": 0.3604193329811096, |
|
"learning_rate": 8.39543103432943e-05, |
|
"loss": 1.1556, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.30260047281323876, |
|
"grad_norm": 0.3755967915058136, |
|
"learning_rate": 8.337940701080768e-05, |
|
"loss": 1.1337, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3073286052009456, |
|
"grad_norm": 0.3861168622970581, |
|
"learning_rate": 8.2796430084997e-05, |
|
"loss": 1.2263, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3120567375886525, |
|
"grad_norm": 0.3755252957344055, |
|
"learning_rate": 8.22055205725199e-05, |
|
"loss": 1.238, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.31678486997635935, |
|
"grad_norm": 0.3644655644893646, |
|
"learning_rate": 8.160682139871633e-05, |
|
"loss": 1.1605, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3215130023640662, |
|
"grad_norm": 0.3366273045539856, |
|
"learning_rate": 8.100047737303877e-05, |
|
"loss": 1.1462, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3262411347517731, |
|
"grad_norm": 0.3894293010234833, |
|
"learning_rate": 8.03866351540266e-05, |
|
"loss": 1.2754, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3309692671394799, |
|
"grad_norm": 0.34858208894729614, |
|
"learning_rate": 7.97654432138333e-05, |
|
"loss": 1.1449, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33569739952718675, |
|
"grad_norm": 0.3382340669631958, |
|
"learning_rate": 7.913705180231505e-05, |
|
"loss": 1.2337, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 0.33755427598953247, |
|
"learning_rate": 7.850161291068913e-05, |
|
"loss": 1.1643, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.34515366430260047, |
|
"grad_norm": 0.36655935645103455, |
|
"learning_rate": 7.785928023477142e-05, |
|
"loss": 1.1103, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.34988179669030733, |
|
"grad_norm": 0.3621681034564972, |
|
"learning_rate": 7.721020913780137e-05, |
|
"loss": 1.1146, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.3546099290780142, |
|
"grad_norm": 0.3632483184337616, |
|
"learning_rate": 7.655455661286376e-05, |
|
"loss": 1.162, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.35933806146572106, |
|
"grad_norm": 0.39180269837379456, |
|
"learning_rate": 7.589248124491627e-05, |
|
"loss": 1.2159, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3640661938534279, |
|
"grad_norm": 0.3684546649456024, |
|
"learning_rate": 7.5224143172432e-05, |
|
"loss": 1.1239, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.36879432624113473, |
|
"grad_norm": 0.39688640832901, |
|
"learning_rate": 7.454970404866611e-05, |
|
"loss": 1.1401, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3735224586288416, |
|
"grad_norm": 0.3946040868759155, |
|
"learning_rate": 7.386932700255636e-05, |
|
"loss": 1.1358, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.37825059101654845, |
|
"grad_norm": 0.43118810653686523, |
|
"learning_rate": 7.318317659926637e-05, |
|
"loss": 1.0864, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3829787234042553, |
|
"grad_norm": 0.37678292393684387, |
|
"learning_rate": 7.24914188003818e-05, |
|
"loss": 1.0901, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3877068557919622, |
|
"grad_norm": 0.41417282819747925, |
|
"learning_rate": 7.179422092376856e-05, |
|
"loss": 1.1902, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.39243498817966904, |
|
"grad_norm": 0.395439475774765, |
|
"learning_rate": 7.109175160310312e-05, |
|
"loss": 1.1476, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3971631205673759, |
|
"grad_norm": 0.36730679869651794, |
|
"learning_rate": 7.038418074708444e-05, |
|
"loss": 1.1107, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.40189125295508277, |
|
"grad_norm": 0.37374141812324524, |
|
"learning_rate": 6.967167949833763e-05, |
|
"loss": 1.1142, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4066193853427896, |
|
"grad_norm": 0.3955516815185547, |
|
"learning_rate": 6.895442019201897e-05, |
|
"loss": 1.0354, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.41134751773049644, |
|
"grad_norm": 0.4088446795940399, |
|
"learning_rate": 6.823257631413276e-05, |
|
"loss": 1.1146, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4160756501182033, |
|
"grad_norm": 0.40435466170310974, |
|
"learning_rate": 6.750632245956953e-05, |
|
"loss": 1.1411, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.42080378250591016, |
|
"grad_norm": 0.38119205832481384, |
|
"learning_rate": 6.677583428987625e-05, |
|
"loss": 1.0443, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 0.39904236793518066, |
|
"learning_rate": 6.604128849076838e-05, |
|
"loss": 1.1301, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4302600472813239, |
|
"grad_norm": 0.39689844846725464, |
|
"learning_rate": 6.530286272939437e-05, |
|
"loss": 1.0686, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.43498817966903075, |
|
"grad_norm": 0.4293765127658844, |
|
"learning_rate": 6.456073561136261e-05, |
|
"loss": 1.0763, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.4397163120567376, |
|
"grad_norm": 0.4394353926181793, |
|
"learning_rate": 6.381508663754153e-05, |
|
"loss": 1.1274, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.4373772442340851, |
|
"learning_rate": 6.306609616064304e-05, |
|
"loss": 1.1152, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4491725768321513, |
|
"grad_norm": 0.4275088906288147, |
|
"learning_rate": 6.231394534160008e-05, |
|
"loss": 1.0739, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.45390070921985815, |
|
"grad_norm": 0.49015218019485474, |
|
"learning_rate": 6.15588161057485e-05, |
|
"loss": 1.1201, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.458628841607565, |
|
"grad_norm": 0.46768057346343994, |
|
"learning_rate": 6.0800891098824186e-05, |
|
"loss": 1.1138, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.46335697399527187, |
|
"grad_norm": 0.47101691365242004, |
|
"learning_rate": 6.004035364278593e-05, |
|
"loss": 1.1192, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.46808510638297873, |
|
"grad_norm": 0.4808042347431183, |
|
"learning_rate": 5.9277387691474676e-05, |
|
"loss": 1.0965, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4728132387706856, |
|
"grad_norm": 0.7809662222862244, |
|
"learning_rate": 5.851217778611994e-05, |
|
"loss": 1.2548, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4728132387706856, |
|
"eval_loss": 1.1545641422271729, |
|
"eval_runtime": 38.546, |
|
"eval_samples_per_second": 9.262, |
|
"eval_steps_per_second": 2.335, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.47754137115839246, |
|
"grad_norm": 0.6622450351715088, |
|
"learning_rate": 5.774490901070424e-05, |
|
"loss": 1.2761, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.48226950354609927, |
|
"grad_norm": 0.49634912610054016, |
|
"learning_rate": 5.697576694719616e-05, |
|
"loss": 1.2634, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.48699763593380613, |
|
"grad_norm": 0.41559910774230957, |
|
"learning_rate": 5.620493763066297e-05, |
|
"loss": 1.2799, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.491725768321513, |
|
"grad_norm": 0.38408902287483215, |
|
"learning_rate": 5.543260750427373e-05, |
|
"loss": 1.1719, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.49645390070921985, |
|
"grad_norm": 0.3431536555290222, |
|
"learning_rate": 5.465896337420359e-05, |
|
"loss": 1.1056, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5011820330969267, |
|
"grad_norm": 0.342176228761673, |
|
"learning_rate": 5.3884192364450325e-05, |
|
"loss": 1.2039, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5059101654846335, |
|
"grad_norm": 0.37251392006874084, |
|
"learning_rate": 5.3108481871574036e-05, |
|
"loss": 1.223, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 0.3797588646411896, |
|
"learning_rate": 5.233201951937088e-05, |
|
"loss": 1.1501, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5153664302600472, |
|
"grad_norm": 0.3910278081893921, |
|
"learning_rate": 5.155499311349185e-05, |
|
"loss": 1.2336, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5200945626477541, |
|
"grad_norm": 0.3525397479534149, |
|
"learning_rate": 5.077759059601755e-05, |
|
"loss": 1.1213, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.524822695035461, |
|
"grad_norm": 0.3806985020637512, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2013, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5295508274231678, |
|
"grad_norm": 0.38082411885261536, |
|
"learning_rate": 4.9222409403982453e-05, |
|
"loss": 1.1766, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5342789598108747, |
|
"grad_norm": 0.35277897119522095, |
|
"learning_rate": 4.844500688650816e-05, |
|
"loss": 1.1419, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5390070921985816, |
|
"grad_norm": 0.3725706934928894, |
|
"learning_rate": 4.7667980480629124e-05, |
|
"loss": 1.1276, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5437352245862884, |
|
"grad_norm": 0.3541959226131439, |
|
"learning_rate": 4.6891518128425976e-05, |
|
"loss": 1.0653, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5484633569739953, |
|
"grad_norm": 0.371341347694397, |
|
"learning_rate": 4.611580763554969e-05, |
|
"loss": 1.1224, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5531914893617021, |
|
"grad_norm": 0.38338702917099, |
|
"learning_rate": 4.534103662579642e-05, |
|
"loss": 1.1465, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.557919621749409, |
|
"grad_norm": 0.3807014226913452, |
|
"learning_rate": 4.456739249572627e-05, |
|
"loss": 1.1659, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5626477541371159, |
|
"grad_norm": 0.38342806696891785, |
|
"learning_rate": 4.3795062369337034e-05, |
|
"loss": 1.1965, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5673758865248227, |
|
"grad_norm": 0.39216044545173645, |
|
"learning_rate": 4.3024233052803855e-05, |
|
"loss": 1.2172, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5721040189125296, |
|
"grad_norm": 0.383404016494751, |
|
"learning_rate": 4.2255090989295764e-05, |
|
"loss": 1.1739, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5768321513002365, |
|
"grad_norm": 0.38151639699935913, |
|
"learning_rate": 4.148782221388007e-05, |
|
"loss": 1.0698, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5815602836879432, |
|
"grad_norm": 0.35033509135246277, |
|
"learning_rate": 4.0722612308525335e-05, |
|
"loss": 1.121, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5862884160756501, |
|
"grad_norm": 0.36325815320014954, |
|
"learning_rate": 3.9959646357214084e-05, |
|
"loss": 1.0989, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5910165484633569, |
|
"grad_norm": 0.38052505254745483, |
|
"learning_rate": 3.919910890117584e-05, |
|
"loss": 1.0776, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5957446808510638, |
|
"grad_norm": 0.3674011826515198, |
|
"learning_rate": 3.844118389425153e-05, |
|
"loss": 1.12, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6004728132387707, |
|
"grad_norm": 0.3708929717540741, |
|
"learning_rate": 3.7686054658399935e-05, |
|
"loss": 1.1564, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6052009456264775, |
|
"grad_norm": 0.37105265259742737, |
|
"learning_rate": 3.693390383935698e-05, |
|
"loss": 1.1331, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6099290780141844, |
|
"grad_norm": 0.38267219066619873, |
|
"learning_rate": 3.618491336245849e-05, |
|
"loss": 1.0379, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6146572104018913, |
|
"grad_norm": 0.3863324522972107, |
|
"learning_rate": 3.5439264388637405e-05, |
|
"loss": 1.1361, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6193853427895981, |
|
"grad_norm": 0.3742581307888031, |
|
"learning_rate": 3.469713727060564e-05, |
|
"loss": 1.0579, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.624113475177305, |
|
"grad_norm": 0.3845396041870117, |
|
"learning_rate": 3.395871150923163e-05, |
|
"loss": 1.0839, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6288416075650118, |
|
"grad_norm": 0.4115036129951477, |
|
"learning_rate": 3.3224165710123756e-05, |
|
"loss": 1.1757, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6335697399527187, |
|
"grad_norm": 0.39382243156433105, |
|
"learning_rate": 3.249367754043047e-05, |
|
"loss": 1.1352, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 0.39766818284988403, |
|
"learning_rate": 3.176742368586725e-05, |
|
"loss": 1.1353, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6430260047281324, |
|
"grad_norm": 0.3955276310443878, |
|
"learning_rate": 3.104557980798104e-05, |
|
"loss": 1.1329, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6477541371158393, |
|
"grad_norm": 0.4006507396697998, |
|
"learning_rate": 3.032832050166239e-05, |
|
"loss": 1.0925, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6524822695035462, |
|
"grad_norm": 0.37227386236190796, |
|
"learning_rate": 2.9615819252915565e-05, |
|
"loss": 1.0568, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6572104018912529, |
|
"grad_norm": 0.4012226462364197, |
|
"learning_rate": 2.890824839689689e-05, |
|
"loss": 1.1103, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6619385342789598, |
|
"grad_norm": 0.398710697889328, |
|
"learning_rate": 2.8205779076231447e-05, |
|
"loss": 1.086, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.38928738236427307, |
|
"learning_rate": 2.750858119961821e-05, |
|
"loss": 1.1066, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6713947990543735, |
|
"grad_norm": 0.41606059670448303, |
|
"learning_rate": 2.6816823400733625e-05, |
|
"loss": 1.1212, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6761229314420804, |
|
"grad_norm": 0.4255363345146179, |
|
"learning_rate": 2.613067299744364e-05, |
|
"loss": 1.0511, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 0.422050416469574, |
|
"learning_rate": 2.5450295951333896e-05, |
|
"loss": 1.1598, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6855791962174941, |
|
"grad_norm": 0.46749165654182434, |
|
"learning_rate": 2.4775856827568016e-05, |
|
"loss": 1.111, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6903073286052009, |
|
"grad_norm": 0.4487079083919525, |
|
"learning_rate": 2.410751875508373e-05, |
|
"loss": 1.0855, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6950354609929078, |
|
"grad_norm": 0.45914727449417114, |
|
"learning_rate": 2.3445443387136244e-05, |
|
"loss": 1.0781, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6997635933806147, |
|
"grad_norm": 0.4794670045375824, |
|
"learning_rate": 2.2789790862198628e-05, |
|
"loss": 1.0906, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7044917257683215, |
|
"grad_norm": 0.49251869320869446, |
|
"learning_rate": 2.2140719765228584e-05, |
|
"loss": 1.1217, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7092198581560284, |
|
"grad_norm": 0.6213652491569519, |
|
"learning_rate": 2.1498387089310868e-05, |
|
"loss": 1.1458, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7092198581560284, |
|
"eval_loss": 1.1165207624435425, |
|
"eval_runtime": 38.5292, |
|
"eval_samples_per_second": 9.266, |
|
"eval_steps_per_second": 2.336, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7139479905437353, |
|
"grad_norm": 0.479676216840744, |
|
"learning_rate": 2.0862948197684955e-05, |
|
"loss": 1.2254, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7186761229314421, |
|
"grad_norm": 0.4174903631210327, |
|
"learning_rate": 2.0234556786166715e-05, |
|
"loss": 1.1599, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.723404255319149, |
|
"grad_norm": 0.4270111918449402, |
|
"learning_rate": 1.961336484597343e-05, |
|
"loss": 1.1773, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7281323877068558, |
|
"grad_norm": 0.4060277044773102, |
|
"learning_rate": 1.899952262696125e-05, |
|
"loss": 1.2089, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7328605200945626, |
|
"grad_norm": 0.40245646238327026, |
|
"learning_rate": 1.8393178601283683e-05, |
|
"loss": 1.1942, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7375886524822695, |
|
"grad_norm": 0.3960253894329071, |
|
"learning_rate": 1.7794479427480117e-05, |
|
"loss": 1.2359, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7423167848699763, |
|
"grad_norm": 0.36273330450057983, |
|
"learning_rate": 1.7203569915003005e-05, |
|
"loss": 1.1725, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7470449172576832, |
|
"grad_norm": 0.38699132204055786, |
|
"learning_rate": 1.662059298919232e-05, |
|
"loss": 1.2018, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.75177304964539, |
|
"grad_norm": 0.3809574544429779, |
|
"learning_rate": 1.6045689656705716e-05, |
|
"loss": 1.1198, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7565011820330969, |
|
"grad_norm": 0.37179070711135864, |
|
"learning_rate": 1.5478998971412668e-05, |
|
"loss": 1.1596, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7612293144208038, |
|
"grad_norm": 0.3978999853134155, |
|
"learning_rate": 1.4920658000761174e-05, |
|
"loss": 1.1881, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 0.37211042642593384, |
|
"learning_rate": 1.4370801792624656e-05, |
|
"loss": 1.1375, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7706855791962175, |
|
"grad_norm": 0.36233842372894287, |
|
"learning_rate": 1.3829563342637513e-05, |
|
"loss": 1.0862, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7754137115839244, |
|
"grad_norm": 0.3661029040813446, |
|
"learning_rate": 1.329707356202699e-05, |
|
"loss": 1.0632, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7801418439716312, |
|
"grad_norm": 0.3897812068462372, |
|
"learning_rate": 1.2773461245949247e-05, |
|
"loss": 1.1953, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7848699763593381, |
|
"grad_norm": 0.3424377143383026, |
|
"learning_rate": 1.225885304233716e-05, |
|
"loss": 1.0391, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.789598108747045, |
|
"grad_norm": 0.363320916891098, |
|
"learning_rate": 1.1753373421267621e-05, |
|
"loss": 1.0834, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7943262411347518, |
|
"grad_norm": 0.36856764554977417, |
|
"learning_rate": 1.125714464485551e-05, |
|
"loss": 1.0455, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7990543735224587, |
|
"grad_norm": 0.36050939559936523, |
|
"learning_rate": 1.0770286737681701e-05, |
|
"loss": 1.0576, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8037825059101655, |
|
"grad_norm": 0.3826131224632263, |
|
"learning_rate": 1.0292917457762325e-05, |
|
"loss": 1.1386, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8085106382978723, |
|
"grad_norm": 0.37651652097702026, |
|
"learning_rate": 9.825152268066213e-06, |
|
"loss": 1.118, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.8132387706855791, |
|
"grad_norm": 0.397661030292511, |
|
"learning_rate": 9.367104308587494e-06, |
|
"loss": 1.1479, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.817966903073286, |
|
"grad_norm": 0.37330368161201477, |
|
"learning_rate": 8.91888436897997e-06, |
|
"loss": 1.047, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.8226950354609929, |
|
"grad_norm": 0.3637228012084961, |
|
"learning_rate": 8.480600861760124e-06, |
|
"loss": 1.0695, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8274231678486997, |
|
"grad_norm": 0.3934190273284912, |
|
"learning_rate": 8.052359796084951e-06, |
|
"loss": 1.0716, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8321513002364066, |
|
"grad_norm": 0.36225423216819763, |
|
"learning_rate": 7.63426475211113e-06, |
|
"loss": 1.0293, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.8368794326241135, |
|
"grad_norm": 0.3973957598209381, |
|
"learning_rate": 7.226416855941814e-06, |
|
"loss": 1.072, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.8416075650118203, |
|
"grad_norm": 0.42150604724884033, |
|
"learning_rate": 6.828914755166827e-06, |
|
"loss": 1.0884, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.8463356973995272, |
|
"grad_norm": 0.38192811608314514, |
|
"learning_rate": 6.441854595002477e-06, |
|
"loss": 1.0185, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 0.3970332145690918, |
|
"learning_rate": 6.065329995036573e-06, |
|
"loss": 1.0495, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8557919621749409, |
|
"grad_norm": 0.4090059995651245, |
|
"learning_rate": 5.699432026584267e-06, |
|
"loss": 1.0787, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.8605200945626478, |
|
"grad_norm": 0.3918428421020508, |
|
"learning_rate": 5.344249190660428e-06, |
|
"loss": 1.004, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8652482269503546, |
|
"grad_norm": 0.3922417163848877, |
|
"learning_rate": 4.999867396573499e-06, |
|
"loss": 1.0025, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.8699763593380615, |
|
"grad_norm": 0.40068820118904114, |
|
"learning_rate": 4.666369941146376e-06, |
|
"loss": 1.0989, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8747044917257684, |
|
"grad_norm": 0.3978284001350403, |
|
"learning_rate": 4.343837488569058e-06, |
|
"loss": 1.0513, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8794326241134752, |
|
"grad_norm": 0.4173754155635834, |
|
"learning_rate": 4.032348050888179e-06, |
|
"loss": 1.1416, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8841607565011821, |
|
"grad_norm": 0.4168488085269928, |
|
"learning_rate": 3.731976969137929e-06, |
|
"loss": 1.0955, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.4095589816570282, |
|
"learning_rate": 3.4427968951170288e-06, |
|
"loss": 1.0602, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8936170212765957, |
|
"grad_norm": 0.4083250164985657, |
|
"learning_rate": 3.1648777738162494e-06, |
|
"loss": 1.0999, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8983451536643026, |
|
"grad_norm": 0.4310641586780548, |
|
"learning_rate": 2.8982868265005458e-06, |
|
"loss": 1.1334, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9030732860520094, |
|
"grad_norm": 0.4352046847343445, |
|
"learning_rate": 2.6430885344499946e-06, |
|
"loss": 1.0898, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.9078014184397163, |
|
"grad_norm": 0.45793259143829346, |
|
"learning_rate": 2.399344623363503e-06, |
|
"loss": 1.053, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.9125295508274232, |
|
"grad_norm": 0.4551216959953308, |
|
"learning_rate": 2.1671140484290142e-06, |
|
"loss": 1.1926, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.91725768321513, |
|
"grad_norm": 0.4517664313316345, |
|
"learning_rate": 1.946452980063773e-06, |
|
"loss": 1.0751, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.9219858156028369, |
|
"grad_norm": 0.48062533140182495, |
|
"learning_rate": 1.7374147903282178e-06, |
|
"loss": 1.17, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.9267139479905437, |
|
"grad_norm": 0.4554964005947113, |
|
"learning_rate": 1.5400500400166939e-06, |
|
"loss": 1.129, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.9314420803782506, |
|
"grad_norm": 0.47884511947631836, |
|
"learning_rate": 1.3544064664281265e-06, |
|
"loss": 1.1257, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.9361702127659575, |
|
"grad_norm": 0.48259878158569336, |
|
"learning_rate": 1.18052897181965e-06, |
|
"loss": 1.1116, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.9408983451536643, |
|
"grad_norm": 0.4733264446258545, |
|
"learning_rate": 1.0184596125459135e-06, |
|
"loss": 1.1168, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.9456264775413712, |
|
"grad_norm": 0.6054824590682983, |
|
"learning_rate": 8.682375888868166e-07, |
|
"loss": 1.2112, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9456264775413712, |
|
"eval_loss": 1.1042157411575317, |
|
"eval_runtime": 38.4897, |
|
"eval_samples_per_second": 9.275, |
|
"eval_steps_per_second": 2.338, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.950354609929078, |
|
"grad_norm": 0.3739728331565857, |
|
"learning_rate": 7.29899235565934e-07, |
|
"loss": 1.1624, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.9550827423167849, |
|
"grad_norm": 0.35657617449760437, |
|
"learning_rate": 6.034780129621664e-07, |
|
"loss": 1.0881, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9598108747044918, |
|
"grad_norm": 0.37702855467796326, |
|
"learning_rate": 4.890044990165321e-07, |
|
"loss": 1.1991, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.9645390070921985, |
|
"grad_norm": 0.3940827250480652, |
|
"learning_rate": 3.8650638183617694e-07, |
|
"loss": 1.1217, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9692671394799054, |
|
"grad_norm": 0.3676983118057251, |
|
"learning_rate": 2.9600845299737056e-07, |
|
"loss": 1.0818, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.9739952718676123, |
|
"grad_norm": 0.399383008480072, |
|
"learning_rate": 2.1753260154906973e-07, |
|
"loss": 1.0898, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.9787234042553191, |
|
"grad_norm": 0.3540259599685669, |
|
"learning_rate": 1.5109780871853663e-07, |
|
"loss": 1.0337, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.983451536643026, |
|
"grad_norm": 0.3893062174320221, |
|
"learning_rate": 9.672014332028356e-08, |
|
"loss": 0.9161, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.9881796690307328, |
|
"grad_norm": 0.39612168073654175, |
|
"learning_rate": 5.4412757869459763e-08, |
|
"loss": 1.0683, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.9929078014184397, |
|
"grad_norm": 0.443149209022522, |
|
"learning_rate": 2.4185885400596075e-08, |
|
"loss": 1.0856, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9976359338061466, |
|
"grad_norm": 0.46631062030792236, |
|
"learning_rate": 6.04683699252373e-09, |
|
"loss": 1.101, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.0023640661938533, |
|
"grad_norm": 0.9580826759338379, |
|
"learning_rate": 0.0, |
|
"loss": 1.865, |
|
"step": 212 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 212, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8356191188942848e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|