{ "best_metric": 0.6475752433152033, "best_model_checkpoint": "./runtime-masked/MiniLMv2-L6-H384-distilled-from-RoBERTa-Large-finetuned-wikitext103-mlm-multi-emails-hq-x2bs/checkpoint-4004", "epoch": 16.0, "global_step": 4928, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.4291497975708505e-06, "loss": 7.2679, "step": 3 }, { "epoch": 0.02, "learning_rate": 4.858299595141701e-06, "loss": 7.1451, "step": 6 }, { "epoch": 0.03, "learning_rate": 7.287449392712551e-06, "loss": 7.1002, "step": 9 }, { "epoch": 0.04, "learning_rate": 9.716599190283402e-06, "loss": 6.9508, "step": 12 }, { "epoch": 0.05, "learning_rate": 1.2145748987854251e-05, "loss": 6.7874, "step": 15 }, { "epoch": 0.06, "learning_rate": 1.4574898785425101e-05, "loss": 6.6279, "step": 18 }, { "epoch": 0.07, "learning_rate": 1.7004048582995952e-05, "loss": 6.5254, "step": 21 }, { "epoch": 0.08, "learning_rate": 1.9433198380566804e-05, "loss": 6.3327, "step": 24 }, { "epoch": 0.09, "learning_rate": 2.1862348178137653e-05, "loss": 6.2761, "step": 27 }, { "epoch": 0.1, "learning_rate": 2.4291497975708502e-05, "loss": 6.0968, "step": 30 }, { "epoch": 0.11, "learning_rate": 2.6720647773279357e-05, "loss": 5.9384, "step": 33 }, { "epoch": 0.12, "learning_rate": 2.9149797570850203e-05, "loss": 5.8496, "step": 36 }, { "epoch": 0.13, "learning_rate": 3.157894736842105e-05, "loss": 5.7136, "step": 39 }, { "epoch": 0.14, "learning_rate": 3.4008097165991904e-05, "loss": 5.6149, "step": 42 }, { "epoch": 0.15, "learning_rate": 3.6437246963562756e-05, "loss": 5.5674, "step": 45 }, { "epoch": 0.16, "learning_rate": 3.886639676113361e-05, "loss": 5.489, "step": 48 }, { "epoch": 0.17, "learning_rate": 4.1295546558704454e-05, "loss": 5.3851, "step": 51 }, { "epoch": 0.18, "learning_rate": 4.3724696356275306e-05, "loss": 5.3135, "step": 54 }, { "epoch": 0.19, "learning_rate": 4.615384615384616e-05, "loss": 5.1979, "step": 57 }, { "epoch": 0.19, "learning_rate": 4.8582995951417004e-05, "loss": 5.0876, "step": 60 }, { "epoch": 0.2, "learning_rate": 5.101214574898786e-05, "loss": 5.094, "step": 63 }, { "epoch": 0.21, "learning_rate": 5.3441295546558715e-05, "loss": 5.0148, "step": 66 }, { "epoch": 0.22, "learning_rate": 5.587044534412956e-05, "loss": 4.9376, "step": 69 }, { "epoch": 0.23, "learning_rate": 5.8299595141700406e-05, "loss": 4.9033, "step": 72 }, { "epoch": 0.24, "learning_rate": 6.072874493927125e-05, "loss": 4.8783, "step": 75 }, { "epoch": 0.25, "learning_rate": 6.31578947368421e-05, "loss": 4.8382, "step": 78 }, { "epoch": 0.26, "learning_rate": 6.558704453441296e-05, "loss": 4.7009, "step": 81 }, { "epoch": 0.27, "learning_rate": 6.801619433198381e-05, "loss": 4.6597, "step": 84 }, { "epoch": 0.28, "learning_rate": 7.044534412955465e-05, "loss": 4.5674, "step": 87 }, { "epoch": 0.29, "learning_rate": 7.287449392712551e-05, "loss": 4.5938, "step": 90 }, { "epoch": 0.3, "learning_rate": 7.530364372469636e-05, "loss": 4.6061, "step": 93 }, { "epoch": 0.31, "learning_rate": 7.773279352226722e-05, "loss": 4.582, "step": 96 }, { "epoch": 0.32, "learning_rate": 8.016194331983806e-05, "loss": 4.4975, "step": 99 }, { "epoch": 0.33, "learning_rate": 8.259109311740891e-05, "loss": 4.3876, "step": 102 }, { "epoch": 0.34, "learning_rate": 8.502024291497977e-05, "loss": 4.388, "step": 105 }, { "epoch": 0.35, "learning_rate": 8.744939271255061e-05, "loss": 4.2698, "step": 108 }, { "epoch": 0.36, "learning_rate": 8.987854251012147e-05, "loss": 4.3306, "step": 111 }, { "epoch": 0.37, "learning_rate": 9.230769230769232e-05, "loss": 4.3391, "step": 114 }, { "epoch": 0.38, "learning_rate": 9.473684210526316e-05, "loss": 4.1981, "step": 117 }, { "epoch": 0.39, "learning_rate": 9.716599190283401e-05, "loss": 4.3052, "step": 120 }, { "epoch": 0.4, "learning_rate": 9.959514170040485e-05, "loss": 4.1384, "step": 123 }, { "epoch": 0.41, "learning_rate": 0.00010202429149797573, "loss": 4.159, "step": 126 }, { "epoch": 0.42, "learning_rate": 0.00010445344129554657, "loss": 4.1178, "step": 129 }, { "epoch": 0.43, "learning_rate": 0.00010688259109311743, "loss": 4.0929, "step": 132 }, { "epoch": 0.44, "learning_rate": 0.00010931174089068827, "loss": 4.0761, "step": 135 }, { "epoch": 0.45, "learning_rate": 0.00011174089068825912, "loss": 3.9875, "step": 138 }, { "epoch": 0.46, "learning_rate": 0.00011417004048582995, "loss": 4.0039, "step": 141 }, { "epoch": 0.47, "learning_rate": 0.00011659919028340081, "loss": 3.9917, "step": 144 }, { "epoch": 0.48, "learning_rate": 0.00011902834008097166, "loss": 4.0101, "step": 147 }, { "epoch": 0.49, "learning_rate": 0.0001214574898785425, "loss": 3.9108, "step": 150 }, { "epoch": 0.5, "learning_rate": 0.00012388663967611335, "loss": 3.9445, "step": 153 }, { "epoch": 0.51, "learning_rate": 0.0001263157894736842, "loss": 3.96, "step": 156 }, { "epoch": 0.52, "learning_rate": 0.00012874493927125507, "loss": 3.9475, "step": 159 }, { "epoch": 0.53, "learning_rate": 0.00013117408906882592, "loss": 3.8582, "step": 162 }, { "epoch": 0.54, "learning_rate": 0.00013360323886639676, "loss": 3.8952, "step": 165 }, { "epoch": 0.55, "learning_rate": 0.00013603238866396762, "loss": 3.7632, "step": 168 }, { "epoch": 0.56, "learning_rate": 0.00013846153846153847, "loss": 3.7845, "step": 171 }, { "epoch": 0.56, "learning_rate": 0.0001408906882591093, "loss": 3.7638, "step": 174 }, { "epoch": 0.57, "learning_rate": 0.00014331983805668017, "loss": 3.8404, "step": 177 }, { "epoch": 0.58, "learning_rate": 0.00014574898785425102, "loss": 3.7742, "step": 180 }, { "epoch": 0.59, "learning_rate": 0.00014817813765182186, "loss": 3.7533, "step": 183 }, { "epoch": 0.6, "learning_rate": 0.00015060728744939272, "loss": 3.7303, "step": 186 }, { "epoch": 0.61, "learning_rate": 0.00015303643724696357, "loss": 3.7195, "step": 189 }, { "epoch": 0.62, "learning_rate": 0.00015546558704453443, "loss": 3.7544, "step": 192 }, { "epoch": 0.63, "learning_rate": 0.00015789473684210527, "loss": 3.6913, "step": 195 }, { "epoch": 0.64, "learning_rate": 0.00016032388663967612, "loss": 3.7917, "step": 198 }, { "epoch": 0.65, "learning_rate": 0.00016275303643724698, "loss": 3.6758, "step": 201 }, { "epoch": 0.66, "learning_rate": 0.00016518218623481781, "loss": 3.6774, "step": 204 }, { "epoch": 0.67, "learning_rate": 0.00016761133603238867, "loss": 3.6199, "step": 207 }, { "epoch": 0.68, "learning_rate": 0.00017004048582995953, "loss": 3.6028, "step": 210 }, { "epoch": 0.69, "learning_rate": 0.0001724696356275304, "loss": 3.6084, "step": 213 }, { "epoch": 0.7, "learning_rate": 0.00017489878542510122, "loss": 3.6165, "step": 216 }, { "epoch": 0.71, "learning_rate": 0.00017732793522267208, "loss": 3.5123, "step": 219 }, { "epoch": 0.72, "learning_rate": 0.00017975708502024294, "loss": 3.5594, "step": 222 }, { "epoch": 0.73, "learning_rate": 0.00018218623481781377, "loss": 3.6238, "step": 225 }, { "epoch": 0.74, "learning_rate": 0.00018461538461538463, "loss": 3.4991, "step": 228 }, { "epoch": 0.75, "learning_rate": 0.0001870445344129555, "loss": 3.5384, "step": 231 }, { "epoch": 0.76, "learning_rate": 0.00018947368421052632, "loss": 3.5282, "step": 234 }, { "epoch": 0.77, "learning_rate": 0.00019190283400809716, "loss": 3.574, "step": 237 }, { "epoch": 0.78, "learning_rate": 0.00019433198380566801, "loss": 3.5391, "step": 240 }, { "epoch": 0.79, "learning_rate": 0.00019676113360323887, "loss": 3.4529, "step": 243 }, { "epoch": 0.8, "learning_rate": 0.0001991902834008097, "loss": 3.4957, "step": 246 }, { "epoch": 0.81, "learning_rate": 0.00019999990991501854, "loss": 3.4346, "step": 249 }, { "epoch": 0.82, "learning_rate": 0.00019999943696930958, "loss": 3.4838, "step": 252 }, { "epoch": 0.83, "learning_rate": 0.00019999855864354245, "loss": 3.4721, "step": 255 }, { "epoch": 0.84, "learning_rate": 0.0001999972749412778, "loss": 3.5, "step": 258 }, { "epoch": 0.85, "learning_rate": 0.00019999558586771948, "loss": 3.3624, "step": 261 }, { "epoch": 0.86, "learning_rate": 0.00019999349142971467, "loss": 3.4138, "step": 264 }, { "epoch": 0.87, "learning_rate": 0.00019999099163575389, "loss": 3.4005, "step": 267 }, { "epoch": 0.88, "learning_rate": 0.00019998808649597085, "loss": 3.365, "step": 270 }, { "epoch": 0.89, "learning_rate": 0.0001999847760221425, "loss": 3.3424, "step": 273 }, { "epoch": 0.9, "learning_rate": 0.00019998106022768887, "loss": 3.3629, "step": 276 }, { "epoch": 0.91, "learning_rate": 0.00019997693912767318, "loss": 3.3722, "step": 279 }, { "epoch": 0.92, "learning_rate": 0.00019997241273880158, "loss": 3.3951, "step": 282 }, { "epoch": 0.93, "learning_rate": 0.00019996748107942335, "loss": 3.3817, "step": 285 }, { "epoch": 0.94, "learning_rate": 0.00019996214416953046, "loss": 3.3289, "step": 288 }, { "epoch": 0.94, "learning_rate": 0.00019995640203075788, "loss": 3.3074, "step": 291 }, { "epoch": 0.95, "learning_rate": 0.00019995025468638318, "loss": 3.3145, "step": 294 }, { "epoch": 0.96, "learning_rate": 0.00019994370216132662, "loss": 3.2853, "step": 297 }, { "epoch": 0.97, "learning_rate": 0.000199936744482151, "loss": 3.2416, "step": 300 }, { "epoch": 0.98, "learning_rate": 0.0001999293816770615, "loss": 3.2565, "step": 303 }, { "epoch": 0.99, "learning_rate": 0.00019992161377590563, "loss": 3.2947, "step": 306 }, { "epoch": 1.0, "eval_accuracy": 0.5121698756686252, "eval_loss": 3.0832247734069824, "eval_runtime": 16.2528, "eval_samples_per_second": 135.177, "eval_steps_per_second": 67.619, "step": 308 }, { "epoch": 1.0, "learning_rate": 0.0001999134408101731, "loss": 3.2464, "step": 309 }, { "epoch": 1.01, "learning_rate": 0.00019990486281299568, "loss": 3.2509, "step": 312 }, { "epoch": 1.02, "learning_rate": 0.00019989587981914704, "loss": 3.284, "step": 315 }, { "epoch": 1.03, "learning_rate": 0.00019988649186504262, "loss": 3.1894, "step": 318 }, { "epoch": 1.04, "learning_rate": 0.0001998766989887396, "loss": 3.3045, "step": 321 }, { "epoch": 1.05, "learning_rate": 0.0001998665012299365, "loss": 3.2935, "step": 324 }, { "epoch": 1.06, "learning_rate": 0.0001998558986299733, "loss": 3.1808, "step": 327 }, { "epoch": 1.07, "learning_rate": 0.000199844891231831, "loss": 3.1295, "step": 330 }, { "epoch": 1.08, "learning_rate": 0.00019983347908013172, "loss": 3.262, "step": 333 }, { "epoch": 1.09, "learning_rate": 0.00019982166222113826, "loss": 3.1685, "step": 336 }, { "epoch": 1.1, "learning_rate": 0.00019980944070275406, "loss": 3.1682, "step": 339 }, { "epoch": 1.11, "learning_rate": 0.00019979681457452304, "loss": 3.2196, "step": 342 }, { "epoch": 1.12, "learning_rate": 0.0001997837838876293, "loss": 3.1117, "step": 345 }, { "epoch": 1.13, "learning_rate": 0.0001997703486948969, "loss": 3.2077, "step": 348 }, { "epoch": 1.14, "learning_rate": 0.00019975650905078976, "loss": 3.1355, "step": 351 }, { "epoch": 1.15, "learning_rate": 0.00019974226501141137, "loss": 3.2277, "step": 354 }, { "epoch": 1.16, "learning_rate": 0.00019972761663450452, "loss": 3.14, "step": 357 }, { "epoch": 1.17, "learning_rate": 0.0001997125639794512, "loss": 3.1937, "step": 360 }, { "epoch": 1.18, "learning_rate": 0.00019969710710727214, "loss": 3.1441, "step": 363 }, { "epoch": 1.19, "learning_rate": 0.00019968124608062682, "loss": 3.1193, "step": 366 }, { "epoch": 1.2, "learning_rate": 0.000199664980963813, "loss": 3.1849, "step": 369 }, { "epoch": 1.21, "learning_rate": 0.00019964831182276663, "loss": 3.1189, "step": 372 }, { "epoch": 1.22, "learning_rate": 0.00019963123872506147, "loss": 3.0733, "step": 375 }, { "epoch": 1.23, "learning_rate": 0.0001996137617399088, "loss": 3.101, "step": 378 }, { "epoch": 1.24, "learning_rate": 0.00019959588093815728, "loss": 3.074, "step": 381 }, { "epoch": 1.25, "learning_rate": 0.00019957759639229247, "loss": 3.1142, "step": 384 }, { "epoch": 1.26, "learning_rate": 0.00019955890817643674, "loss": 3.1246, "step": 387 }, { "epoch": 1.27, "learning_rate": 0.0001995398163663488, "loss": 3.104, "step": 390 }, { "epoch": 1.28, "learning_rate": 0.00019952032103942347, "loss": 3.1105, "step": 393 }, { "epoch": 1.29, "learning_rate": 0.0001995004222746913, "loss": 3.0985, "step": 396 }, { "epoch": 1.3, "learning_rate": 0.00019948012015281853, "loss": 3.1341, "step": 399 }, { "epoch": 1.31, "learning_rate": 0.00019945941475610623, "loss": 3.0335, "step": 402 }, { "epoch": 1.31, "learning_rate": 0.0001994383061684905, "loss": 3.0978, "step": 405 }, { "epoch": 1.32, "learning_rate": 0.00019941679447554175, "loss": 3.0737, "step": 408 }, { "epoch": 1.33, "learning_rate": 0.00019939487976446468, "loss": 3.0942, "step": 411 }, { "epoch": 1.34, "learning_rate": 0.00019937256212409756, "loss": 3.0471, "step": 414 }, { "epoch": 1.35, "learning_rate": 0.00019934984164491227, "loss": 3.0165, "step": 417 }, { "epoch": 1.36, "learning_rate": 0.00019932671841901354, "loss": 2.9886, "step": 420 }, { "epoch": 1.37, "learning_rate": 0.00019930319254013887, "loss": 3.0629, "step": 423 }, { "epoch": 1.38, "learning_rate": 0.000199279264103658, "loss": 3.0518, "step": 426 }, { "epoch": 1.39, "learning_rate": 0.00019925493320657262, "loss": 2.9858, "step": 429 }, { "epoch": 1.4, "learning_rate": 0.00019923019994751585, "loss": 3.0696, "step": 432 }, { "epoch": 1.41, "learning_rate": 0.000199205064426752, "loss": 3.1251, "step": 435 }, { "epoch": 1.42, "learning_rate": 0.000199179526746176, "loss": 3.0311, "step": 438 }, { "epoch": 1.43, "learning_rate": 0.00019915358700931313, "loss": 2.9571, "step": 441 }, { "epoch": 1.44, "learning_rate": 0.00019912724532131847, "loss": 2.9914, "step": 444 }, { "epoch": 1.45, "learning_rate": 0.00019910050178897657, "loss": 2.9803, "step": 447 }, { "epoch": 1.46, "learning_rate": 0.00019907335652070103, "loss": 3.0183, "step": 450 }, { "epoch": 1.47, "learning_rate": 0.0001990458096265339, "loss": 3.0207, "step": 453 }, { "epoch": 1.48, "learning_rate": 0.00019901786121814547, "loss": 2.9883, "step": 456 }, { "epoch": 1.49, "learning_rate": 0.00019898951140883369, "loss": 2.924, "step": 459 }, { "epoch": 1.5, "learning_rate": 0.0001989607603135236, "loss": 3.0618, "step": 462 }, { "epoch": 1.51, "learning_rate": 0.00019893160804876708, "loss": 3.0179, "step": 465 }, { "epoch": 1.52, "learning_rate": 0.00019890205473274236, "loss": 2.9295, "step": 468 }, { "epoch": 1.53, "learning_rate": 0.00019887210048525323, "loss": 2.9724, "step": 471 }, { "epoch": 1.54, "learning_rate": 0.00019884174542772899, "loss": 2.9413, "step": 474 }, { "epoch": 1.55, "learning_rate": 0.00019881098968322367, "loss": 3.0484, "step": 477 }, { "epoch": 1.56, "learning_rate": 0.00019877983337641565, "loss": 2.9098, "step": 480 }, { "epoch": 1.57, "learning_rate": 0.00019874827663360706, "loss": 2.9568, "step": 483 }, { "epoch": 1.58, "learning_rate": 0.00019871631958272336, "loss": 2.9348, "step": 486 }, { "epoch": 1.59, "learning_rate": 0.00019868396235331282, "loss": 2.9615, "step": 489 }, { "epoch": 1.6, "learning_rate": 0.00019865120507654593, "loss": 2.9036, "step": 492 }, { "epoch": 1.61, "learning_rate": 0.00019861804788521493, "loss": 2.8977, "step": 495 }, { "epoch": 1.62, "learning_rate": 0.00019858449091373313, "loss": 3.0531, "step": 498 }, { "epoch": 1.63, "learning_rate": 0.00019855053429813463, "loss": 2.9548, "step": 501 }, { "epoch": 1.64, "learning_rate": 0.00019851617817607354, "loss": 2.9541, "step": 504 }, { "epoch": 1.65, "learning_rate": 0.00019848142268682356, "loss": 2.8871, "step": 507 }, { "epoch": 1.66, "learning_rate": 0.00019844626797127724, "loss": 2.8821, "step": 510 }, { "epoch": 1.67, "learning_rate": 0.00019841071417194561, "loss": 2.9179, "step": 513 }, { "epoch": 1.68, "learning_rate": 0.00019837476143295748, "loss": 2.9251, "step": 516 }, { "epoch": 1.69, "learning_rate": 0.00019833840990005893, "loss": 2.8764, "step": 519 }, { "epoch": 1.69, "learning_rate": 0.00019830165972061265, "loss": 2.8817, "step": 522 }, { "epoch": 1.7, "learning_rate": 0.00019826451104359738, "loss": 2.8707, "step": 525 }, { "epoch": 1.71, "learning_rate": 0.00019822696401960727, "loss": 2.8489, "step": 528 }, { "epoch": 1.72, "learning_rate": 0.00019818901880085137, "loss": 2.9285, "step": 531 }, { "epoch": 1.73, "learning_rate": 0.00019815067554115282, "loss": 2.8707, "step": 534 }, { "epoch": 1.74, "learning_rate": 0.0001981119343959485, "loss": 2.8615, "step": 537 }, { "epoch": 1.75, "learning_rate": 0.00019807279552228816, "loss": 2.8273, "step": 540 }, { "epoch": 1.76, "learning_rate": 0.00019803325907883385, "loss": 2.9113, "step": 543 }, { "epoch": 1.77, "learning_rate": 0.00019799332522585936, "loss": 2.8561, "step": 546 }, { "epoch": 1.78, "learning_rate": 0.00019795299412524945, "loss": 2.8562, "step": 549 }, { "epoch": 1.79, "learning_rate": 0.00019791226594049932, "loss": 2.8861, "step": 552 }, { "epoch": 1.8, "learning_rate": 0.00019787114083671375, "loss": 2.8196, "step": 555 }, { "epoch": 1.81, "learning_rate": 0.00019782961898060677, "loss": 2.8737, "step": 558 }, { "epoch": 1.82, "learning_rate": 0.00019778770054050058, "loss": 2.9101, "step": 561 }, { "epoch": 1.83, "learning_rate": 0.00019774538568632515, "loss": 2.9066, "step": 564 }, { "epoch": 1.84, "learning_rate": 0.00019770267458961741, "loss": 2.8889, "step": 567 }, { "epoch": 1.85, "learning_rate": 0.00019765956742352062, "loss": 2.8761, "step": 570 }, { "epoch": 1.86, "learning_rate": 0.00019761606436278362, "loss": 2.8484, "step": 573 }, { "epoch": 1.87, "learning_rate": 0.00019757216558376013, "loss": 2.8575, "step": 576 }, { "epoch": 1.88, "learning_rate": 0.00019752787126440803, "loss": 2.785, "step": 579 }, { "epoch": 1.89, "learning_rate": 0.0001974831815842887, "loss": 2.8186, "step": 582 }, { "epoch": 1.9, "learning_rate": 0.00019743809672456618, "loss": 2.8184, "step": 585 }, { "epoch": 1.91, "learning_rate": 0.0001973926168680066, "loss": 2.8115, "step": 588 }, { "epoch": 1.92, "learning_rate": 0.00019734674219897718, "loss": 2.8627, "step": 591 }, { "epoch": 1.93, "learning_rate": 0.00019730047290344578, "loss": 2.8416, "step": 594 }, { "epoch": 1.94, "learning_rate": 0.0001972538091689799, "loss": 2.8565, "step": 597 }, { "epoch": 1.95, "learning_rate": 0.00019720675118474614, "loss": 2.8373, "step": 600 }, { "epoch": 1.96, "learning_rate": 0.00019715929914150923, "loss": 2.8271, "step": 603 }, { "epoch": 1.97, "learning_rate": 0.00019711145323163137, "loss": 2.8548, "step": 606 }, { "epoch": 1.98, "learning_rate": 0.00019706321364907142, "loss": 2.8292, "step": 609 }, { "epoch": 1.99, "learning_rate": 0.00019701458058938418, "loss": 2.9203, "step": 612 }, { "epoch": 2.0, "learning_rate": 0.00019696555424971943, "loss": 2.8727, "step": 615 }, { "epoch": 2.0, "eval_accuracy": 0.5661561892883697, "eval_loss": 2.672184705734253, "eval_runtime": 16.2659, "eval_samples_per_second": 135.068, "eval_steps_per_second": 67.565, "step": 616 }, { "epoch": 2.01, "learning_rate": 0.0001969161348288213, "loss": 2.7287, "step": 618 }, { "epoch": 2.02, "learning_rate": 0.00019686632252702743, "loss": 2.7983, "step": 621 }, { "epoch": 2.03, "learning_rate": 0.00019681611754626807, "loss": 2.7829, "step": 624 }, { "epoch": 2.04, "learning_rate": 0.00019676552009006534, "loss": 2.8671, "step": 627 }, { "epoch": 2.05, "learning_rate": 0.0001967145303635324, "loss": 2.7472, "step": 630 }, { "epoch": 2.06, "learning_rate": 0.00019666314857337262, "loss": 2.7506, "step": 633 }, { "epoch": 2.06, "learning_rate": 0.00019661137492787867, "loss": 2.7307, "step": 636 }, { "epoch": 2.07, "learning_rate": 0.00019655920963693174, "loss": 2.7653, "step": 639 }, { "epoch": 2.08, "learning_rate": 0.00019650665291200082, "loss": 2.8072, "step": 642 }, { "epoch": 2.09, "learning_rate": 0.00019645370496614145, "loss": 2.781, "step": 645 }, { "epoch": 2.1, "learning_rate": 0.00019640036601399535, "loss": 2.6695, "step": 648 }, { "epoch": 2.11, "learning_rate": 0.00019634663627178918, "loss": 2.7504, "step": 651 }, { "epoch": 2.12, "learning_rate": 0.00019629251595733383, "loss": 2.7793, "step": 654 }, { "epoch": 2.13, "learning_rate": 0.00019623800529002347, "loss": 2.7255, "step": 657 }, { "epoch": 2.14, "learning_rate": 0.00019618310449083477, "loss": 2.7955, "step": 660 }, { "epoch": 2.15, "learning_rate": 0.00019612781378232583, "loss": 2.6888, "step": 663 }, { "epoch": 2.16, "learning_rate": 0.00019607213338863547, "loss": 2.8287, "step": 666 }, { "epoch": 2.17, "learning_rate": 0.0001960160635354821, "loss": 2.7925, "step": 669 }, { "epoch": 2.18, "learning_rate": 0.00019595960445016307, "loss": 2.8107, "step": 672 }, { "epoch": 2.19, "learning_rate": 0.00019590275636155352, "loss": 2.7144, "step": 675 }, { "epoch": 2.2, "learning_rate": 0.00019584551950010555, "loss": 2.8271, "step": 678 }, { "epoch": 2.21, "learning_rate": 0.00019578789409784727, "loss": 2.685, "step": 681 }, { "epoch": 2.22, "learning_rate": 0.00019572988038838194, "loss": 2.7504, "step": 684 }, { "epoch": 2.23, "learning_rate": 0.00019567147860688686, "loss": 2.7186, "step": 687 }, { "epoch": 2.24, "learning_rate": 0.00019561268899011256, "loss": 2.7287, "step": 690 }, { "epoch": 2.25, "learning_rate": 0.00019555351177638172, "loss": 2.7973, "step": 693 }, { "epoch": 2.26, "learning_rate": 0.00019549394720558833, "loss": 2.7732, "step": 696 }, { "epoch": 2.27, "learning_rate": 0.00019543399551919668, "loss": 2.7949, "step": 699 }, { "epoch": 2.28, "learning_rate": 0.0001953736569602403, "loss": 2.893, "step": 702 }, { "epoch": 2.29, "learning_rate": 0.00019531293177332102, "loss": 2.7169, "step": 705 }, { "epoch": 2.3, "learning_rate": 0.00019525182020460803, "loss": 2.7442, "step": 708 }, { "epoch": 2.31, "learning_rate": 0.0001951903225018369, "loss": 2.7373, "step": 711 }, { "epoch": 2.32, "learning_rate": 0.0001951284389143084, "loss": 2.6366, "step": 714 }, { "epoch": 2.33, "learning_rate": 0.00019506616969288768, "loss": 2.7411, "step": 717 }, { "epoch": 2.34, "learning_rate": 0.00019500351509000314, "loss": 2.7378, "step": 720 }, { "epoch": 2.35, "learning_rate": 0.00019494047535964553, "loss": 2.6151, "step": 723 }, { "epoch": 2.36, "learning_rate": 0.00019487705075736672, "loss": 2.8224, "step": 726 }, { "epoch": 2.37, "learning_rate": 0.00019481324154027894, "loss": 2.8167, "step": 729 }, { "epoch": 2.38, "learning_rate": 0.00019474904796705337, "loss": 2.6986, "step": 732 }, { "epoch": 2.39, "learning_rate": 0.0001946844702979195, "loss": 2.7249, "step": 735 }, { "epoch": 2.4, "learning_rate": 0.00019461950879466383, "loss": 2.6904, "step": 738 }, { "epoch": 2.41, "learning_rate": 0.0001945541637206287, "loss": 2.7191, "step": 741 }, { "epoch": 2.42, "learning_rate": 0.00019448843534071163, "loss": 2.804, "step": 744 }, { "epoch": 2.43, "learning_rate": 0.00019442232392136375, "loss": 2.7587, "step": 747 }, { "epoch": 2.44, "learning_rate": 0.00019435582973058915, "loss": 2.6742, "step": 750 }, { "epoch": 2.44, "learning_rate": 0.00019428895303794352, "loss": 2.7017, "step": 753 }, { "epoch": 2.45, "learning_rate": 0.00019422169411453317, "loss": 2.7544, "step": 756 }, { "epoch": 2.46, "learning_rate": 0.0001941540532330139, "loss": 2.7186, "step": 759 }, { "epoch": 2.47, "learning_rate": 0.00019408603066758988, "loss": 2.7649, "step": 762 }, { "epoch": 2.48, "learning_rate": 0.00019401762669401257, "loss": 2.8109, "step": 765 }, { "epoch": 2.49, "learning_rate": 0.00019394884158957965, "loss": 2.7248, "step": 768 }, { "epoch": 2.5, "learning_rate": 0.00019387967563313377, "loss": 2.6719, "step": 771 }, { "epoch": 2.51, "learning_rate": 0.00019381012910506146, "loss": 2.7268, "step": 774 }, { "epoch": 2.52, "learning_rate": 0.00019374020228729206, "loss": 2.7121, "step": 777 }, { "epoch": 2.53, "learning_rate": 0.0001936698954632966, "loss": 2.6516, "step": 780 }, { "epoch": 2.54, "learning_rate": 0.00019359920891808647, "loss": 2.7795, "step": 783 }, { "epoch": 2.55, "learning_rate": 0.00019352814293821248, "loss": 2.7295, "step": 786 }, { "epoch": 2.56, "learning_rate": 0.00019345669781176356, "loss": 2.6901, "step": 789 }, { "epoch": 2.57, "learning_rate": 0.00019338487382836565, "loss": 2.7171, "step": 792 }, { "epoch": 2.58, "learning_rate": 0.00019331267127918044, "loss": 2.5934, "step": 795 }, { "epoch": 2.59, "learning_rate": 0.00019324009045690438, "loss": 2.677, "step": 798 }, { "epoch": 2.6, "learning_rate": 0.00019316713165576726, "loss": 2.7009, "step": 801 }, { "epoch": 2.61, "learning_rate": 0.0001930937951715312, "loss": 2.7384, "step": 804 }, { "epoch": 2.62, "learning_rate": 0.00019302008130148932, "loss": 2.6525, "step": 807 }, { "epoch": 2.63, "learning_rate": 0.00019294599034446467, "loss": 2.626, "step": 810 }, { "epoch": 2.64, "learning_rate": 0.00019287152260080888, "loss": 2.6826, "step": 813 }, { "epoch": 2.65, "learning_rate": 0.00019279667837240105, "loss": 2.7388, "step": 816 }, { "epoch": 2.66, "learning_rate": 0.00019272145796264648, "loss": 2.7281, "step": 819 }, { "epoch": 2.67, "learning_rate": 0.0001926458616764754, "loss": 2.5821, "step": 822 }, { "epoch": 2.68, "learning_rate": 0.00019256988982034178, "loss": 2.6877, "step": 825 }, { "epoch": 2.69, "learning_rate": 0.00019249354270222218, "loss": 2.6533, "step": 828 }, { "epoch": 2.7, "learning_rate": 0.00019241682063161428, "loss": 2.7017, "step": 831 }, { "epoch": 2.71, "learning_rate": 0.00019233972391953584, "loss": 2.5812, "step": 834 }, { "epoch": 2.72, "learning_rate": 0.00019226225287852325, "loss": 2.6331, "step": 837 }, { "epoch": 2.73, "learning_rate": 0.0001921844078226305, "loss": 2.6415, "step": 840 }, { "epoch": 2.74, "learning_rate": 0.0001921061890674277, "loss": 2.6382, "step": 843 }, { "epoch": 2.75, "learning_rate": 0.0001920275969299998, "loss": 2.6692, "step": 846 }, { "epoch": 2.76, "learning_rate": 0.00019194863172894552, "loss": 2.6522, "step": 849 }, { "epoch": 2.77, "learning_rate": 0.00019186929378437582, "loss": 2.6311, "step": 852 }, { "epoch": 2.78, "learning_rate": 0.00019178958341791268, "loss": 2.6869, "step": 855 }, { "epoch": 2.79, "learning_rate": 0.00019170950095268792, "loss": 2.6057, "step": 858 }, { "epoch": 2.8, "learning_rate": 0.00019162904671334163, "loss": 2.6813, "step": 861 }, { "epoch": 2.81, "learning_rate": 0.00019154822102602115, "loss": 2.6263, "step": 864 }, { "epoch": 2.81, "learning_rate": 0.0001914670242183795, "loss": 2.6506, "step": 867 }, { "epoch": 2.82, "learning_rate": 0.00019138545661957426, "loss": 2.6605, "step": 870 }, { "epoch": 2.83, "learning_rate": 0.00019130351856026597, "loss": 2.6763, "step": 873 }, { "epoch": 2.84, "learning_rate": 0.00019122121037261719, "loss": 2.6219, "step": 876 }, { "epoch": 2.85, "learning_rate": 0.00019113853239029064, "loss": 2.6208, "step": 879 }, { "epoch": 2.86, "learning_rate": 0.00019105548494844835, "loss": 2.7369, "step": 882 }, { "epoch": 2.87, "learning_rate": 0.00019097206838374997, "loss": 2.5989, "step": 885 }, { "epoch": 2.88, "learning_rate": 0.0001908882830343515, "loss": 2.5859, "step": 888 }, { "epoch": 2.89, "learning_rate": 0.00019080412923990395, "loss": 2.6183, "step": 891 }, { "epoch": 2.9, "learning_rate": 0.00019071960734155194, "loss": 2.6804, "step": 894 }, { "epoch": 2.91, "learning_rate": 0.00019063471768193235, "loss": 2.6772, "step": 897 }, { "epoch": 2.92, "learning_rate": 0.00019054946060517283, "loss": 2.6334, "step": 900 }, { "epoch": 2.93, "learning_rate": 0.00019046383645689055, "loss": 2.7288, "step": 903 }, { "epoch": 2.94, "learning_rate": 0.00019037784558419065, "loss": 2.6486, "step": 906 }, { "epoch": 2.95, "learning_rate": 0.00019029148833566497, "loss": 2.6382, "step": 909 }, { "epoch": 2.96, "learning_rate": 0.00019020476506139057, "loss": 2.5683, "step": 912 }, { "epoch": 2.97, "learning_rate": 0.00019011767611292819, "loss": 2.6047, "step": 915 }, { "epoch": 2.98, "learning_rate": 0.00019003022184332116, "loss": 2.6616, "step": 918 }, { "epoch": 2.99, "learning_rate": 0.0001899424026070936, "loss": 2.6049, "step": 921 }, { "epoch": 3.0, "learning_rate": 0.00018985421876024916, "loss": 2.6339, "step": 924 }, { "epoch": 3.0, "eval_accuracy": 0.5878054172915932, "eval_loss": 2.479712724685669, "eval_runtime": 16.3394, "eval_samples_per_second": 134.46, "eval_steps_per_second": 67.261, "step": 924 }, { "epoch": 3.01, "learning_rate": 0.0001897656706602696, "loss": 2.5743, "step": 927 }, { "epoch": 3.02, "learning_rate": 0.0001896767586661133, "loss": 2.5385, "step": 930 }, { "epoch": 3.03, "learning_rate": 0.0001895874831382138, "loss": 2.6556, "step": 933 }, { "epoch": 3.04, "learning_rate": 0.00018949784443847824, "loss": 2.5895, "step": 936 }, { "epoch": 3.05, "learning_rate": 0.00018940784293028617, "loss": 2.5747, "step": 939 }, { "epoch": 3.06, "learning_rate": 0.00018931747897848778, "loss": 2.6032, "step": 942 }, { "epoch": 3.07, "learning_rate": 0.00018922675294940256, "loss": 2.5687, "step": 945 }, { "epoch": 3.08, "learning_rate": 0.00018913566521081777, "loss": 2.5473, "step": 948 }, { "epoch": 3.09, "learning_rate": 0.00018904421613198712, "loss": 2.6586, "step": 951 }, { "epoch": 3.1, "learning_rate": 0.00018895240608362895, "loss": 2.6245, "step": 954 }, { "epoch": 3.11, "learning_rate": 0.000188860235437925, "loss": 2.6062, "step": 957 }, { "epoch": 3.12, "learning_rate": 0.00018876770456851877, "loss": 2.4521, "step": 960 }, { "epoch": 3.13, "learning_rate": 0.0001886748138505141, "loss": 2.698, "step": 963 }, { "epoch": 3.14, "learning_rate": 0.00018858156366047358, "loss": 2.5416, "step": 966 }, { "epoch": 3.15, "learning_rate": 0.00018848795437641697, "loss": 2.6364, "step": 969 }, { "epoch": 3.16, "learning_rate": 0.00018839398637781972, "loss": 2.5949, "step": 972 }, { "epoch": 3.17, "learning_rate": 0.00018829966004561163, "loss": 2.5397, "step": 975 }, { "epoch": 3.18, "learning_rate": 0.00018820497576217492, "loss": 2.5792, "step": 978 }, { "epoch": 3.19, "learning_rate": 0.00018810993391134295, "loss": 2.5549, "step": 981 }, { "epoch": 3.19, "learning_rate": 0.00018801453487839862, "loss": 2.6141, "step": 984 }, { "epoch": 3.2, "learning_rate": 0.00018791877905007277, "loss": 2.6055, "step": 987 }, { "epoch": 3.21, "learning_rate": 0.00018782266681454255, "loss": 2.5834, "step": 990 }, { "epoch": 3.22, "learning_rate": 0.00018772619856143009, "loss": 2.6272, "step": 993 }, { "epoch": 3.23, "learning_rate": 0.0001876293746818006, "loss": 2.5862, "step": 996 }, { "epoch": 3.24, "learning_rate": 0.000187532195568161, "loss": 2.6453, "step": 999 }, { "epoch": 3.25, "learning_rate": 0.00018743466161445823, "loss": 2.5199, "step": 1002 }, { "epoch": 3.26, "learning_rate": 0.00018733677321607775, "loss": 2.5887, "step": 1005 }, { "epoch": 3.27, "learning_rate": 0.0001872385307698418, "loss": 2.5769, "step": 1008 }, { "epoch": 3.28, "learning_rate": 0.00018713993467400796, "loss": 2.6303, "step": 1011 }, { "epoch": 3.29, "learning_rate": 0.00018704098532826735, "loss": 2.5144, "step": 1014 }, { "epoch": 3.3, "learning_rate": 0.0001869416831337432, "loss": 2.5568, "step": 1017 }, { "epoch": 3.31, "learning_rate": 0.00018684202849298897, "loss": 2.6413, "step": 1020 }, { "epoch": 3.32, "learning_rate": 0.00018674202180998708, "loss": 2.5877, "step": 1023 }, { "epoch": 3.33, "learning_rate": 0.0001866416634901469, "loss": 2.5414, "step": 1026 }, { "epoch": 3.34, "learning_rate": 0.00018654095394030334, "loss": 2.5394, "step": 1029 }, { "epoch": 3.35, "learning_rate": 0.00018643989356871514, "loss": 2.5929, "step": 1032 }, { "epoch": 3.36, "learning_rate": 0.00018633848278506323, "loss": 2.6068, "step": 1035 }, { "epoch": 3.37, "learning_rate": 0.00018623672200044898, "loss": 2.6195, "step": 1038 }, { "epoch": 3.38, "learning_rate": 0.00018613461162739263, "loss": 2.5121, "step": 1041 }, { "epoch": 3.39, "learning_rate": 0.00018603215207983165, "loss": 2.5959, "step": 1044 }, { "epoch": 3.4, "learning_rate": 0.0001859293437731189, "loss": 2.5925, "step": 1047 }, { "epoch": 3.41, "learning_rate": 0.00018582618712402113, "loss": 2.608, "step": 1050 }, { "epoch": 3.42, "learning_rate": 0.00018572268255071718, "loss": 2.6188, "step": 1053 }, { "epoch": 3.43, "learning_rate": 0.0001856188304727963, "loss": 2.5683, "step": 1056 }, { "epoch": 3.44, "learning_rate": 0.00018551463131125649, "loss": 2.5835, "step": 1059 }, { "epoch": 3.45, "learning_rate": 0.00018541008548850273, "loss": 2.5374, "step": 1062 }, { "epoch": 3.46, "learning_rate": 0.0001853051934283453, "loss": 2.5489, "step": 1065 }, { "epoch": 3.47, "learning_rate": 0.00018519995555599817, "loss": 2.4947, "step": 1068 }, { "epoch": 3.48, "learning_rate": 0.000185094372298077, "loss": 2.4604, "step": 1071 }, { "epoch": 3.49, "learning_rate": 0.00018498844408259773, "loss": 2.5453, "step": 1074 }, { "epoch": 3.5, "learning_rate": 0.00018488217133897462, "loss": 2.5738, "step": 1077 }, { "epoch": 3.51, "learning_rate": 0.00018477555449801863, "loss": 2.5437, "step": 1080 }, { "epoch": 3.52, "learning_rate": 0.00018466859399193555, "loss": 2.46, "step": 1083 }, { "epoch": 3.53, "learning_rate": 0.00018456129025432442, "loss": 2.5457, "step": 1086 }, { "epoch": 3.54, "learning_rate": 0.00018445364372017564, "loss": 2.5188, "step": 1089 }, { "epoch": 3.55, "learning_rate": 0.00018434565482586924, "loss": 2.5652, "step": 1092 }, { "epoch": 3.56, "learning_rate": 0.00018423732400917316, "loss": 2.544, "step": 1095 }, { "epoch": 3.56, "learning_rate": 0.00018412865170924135, "loss": 2.6398, "step": 1098 }, { "epoch": 3.57, "learning_rate": 0.00018401963836661218, "loss": 2.6341, "step": 1101 }, { "epoch": 3.58, "learning_rate": 0.00018391028442320644, "loss": 2.5351, "step": 1104 }, { "epoch": 3.59, "learning_rate": 0.0001838005903223257, "loss": 2.5473, "step": 1107 }, { "epoch": 3.6, "learning_rate": 0.00018369055650865052, "loss": 2.5146, "step": 1110 }, { "epoch": 3.61, "learning_rate": 0.00018358018342823855, "loss": 2.5715, "step": 1113 }, { "epoch": 3.62, "learning_rate": 0.0001834694715285227, "loss": 2.5376, "step": 1116 }, { "epoch": 3.63, "learning_rate": 0.00018335842125830954, "loss": 2.6296, "step": 1119 }, { "epoch": 3.64, "learning_rate": 0.00018324703306777718, "loss": 2.4321, "step": 1122 }, { "epoch": 3.65, "learning_rate": 0.00018313530740847375, "loss": 2.5319, "step": 1125 }, { "epoch": 3.66, "learning_rate": 0.0001830232447333153, "loss": 2.5231, "step": 1128 }, { "epoch": 3.67, "learning_rate": 0.00018291084549658412, "loss": 2.5694, "step": 1131 }, { "epoch": 3.68, "learning_rate": 0.00018279811015392685, "loss": 2.499, "step": 1134 }, { "epoch": 3.69, "learning_rate": 0.00018268503916235273, "loss": 2.5255, "step": 1137 }, { "epoch": 3.7, "learning_rate": 0.00018257163298023151, "loss": 2.5671, "step": 1140 }, { "epoch": 3.71, "learning_rate": 0.0001824578920672919, "loss": 2.4801, "step": 1143 }, { "epoch": 3.72, "learning_rate": 0.00018234381688461942, "loss": 2.5006, "step": 1146 }, { "epoch": 3.73, "learning_rate": 0.00018222940789465475, "loss": 2.5033, "step": 1149 }, { "epoch": 3.74, "learning_rate": 0.00018211466556119173, "loss": 2.565, "step": 1152 }, { "epoch": 3.75, "learning_rate": 0.0001819995903493755, "loss": 2.5869, "step": 1155 }, { "epoch": 3.76, "learning_rate": 0.00018188418272570061, "loss": 2.422, "step": 1158 }, { "epoch": 3.77, "learning_rate": 0.00018176844315800924, "loss": 2.4295, "step": 1161 }, { "epoch": 3.78, "learning_rate": 0.0001816523721154892, "loss": 2.4252, "step": 1164 }, { "epoch": 3.79, "learning_rate": 0.00018153597006867188, "loss": 2.5694, "step": 1167 }, { "epoch": 3.8, "learning_rate": 0.00018141923748943073, "loss": 2.4952, "step": 1170 }, { "epoch": 3.81, "learning_rate": 0.00018130217485097893, "loss": 2.4748, "step": 1173 }, { "epoch": 3.82, "learning_rate": 0.00018118478262786782, "loss": 2.5343, "step": 1176 }, { "epoch": 3.83, "learning_rate": 0.0001810670612959847, "loss": 2.4971, "step": 1179 }, { "epoch": 3.84, "learning_rate": 0.00018094901133255105, "loss": 2.4903, "step": 1182 }, { "epoch": 3.85, "learning_rate": 0.00018083063321612056, "loss": 2.5106, "step": 1185 }, { "epoch": 3.86, "learning_rate": 0.0001807119274265773, "loss": 2.4929, "step": 1188 }, { "epoch": 3.87, "learning_rate": 0.00018059289444513347, "loss": 2.5104, "step": 1191 }, { "epoch": 3.88, "learning_rate": 0.00018047353475432782, "loss": 2.4528, "step": 1194 }, { "epoch": 3.89, "learning_rate": 0.00018035384883802346, "loss": 2.4571, "step": 1197 }, { "epoch": 3.9, "learning_rate": 0.00018023383718140593, "loss": 2.5601, "step": 1200 }, { "epoch": 3.91, "learning_rate": 0.00018011350027098127, "loss": 2.4553, "step": 1203 }, { "epoch": 3.92, "learning_rate": 0.00017999283859457412, "loss": 2.5075, "step": 1206 }, { "epoch": 3.93, "learning_rate": 0.0001798718526413256, "loss": 2.5526, "step": 1209 }, { "epoch": 3.94, "learning_rate": 0.00017975054290169138, "loss": 2.4881, "step": 1212 }, { "epoch": 3.94, "learning_rate": 0.0001796289098674397, "loss": 2.4973, "step": 1215 }, { "epoch": 3.95, "learning_rate": 0.00017950695403164943, "loss": 2.4831, "step": 1218 }, { "epoch": 3.96, "learning_rate": 0.000179384675888708, "loss": 2.5187, "step": 1221 }, { "epoch": 3.97, "learning_rate": 0.0001792620759343094, "loss": 2.4949, "step": 1224 }, { "epoch": 3.98, "learning_rate": 0.00017913915466545217, "loss": 2.533, "step": 1227 }, { "epoch": 3.99, "learning_rate": 0.00017901591258043747, "loss": 2.5053, "step": 1230 }, { "epoch": 4.0, "eval_accuracy": 0.6025211491749728, "eval_loss": 2.383329153060913, "eval_runtime": 16.3176, "eval_samples_per_second": 134.64, "eval_steps_per_second": 67.351, "step": 1232 }, { "epoch": 4.0, "learning_rate": 0.0001788923501788669, "loss": 2.5555, "step": 1233 }, { "epoch": 4.01, "learning_rate": 0.00017876846796164068, "loss": 2.4955, "step": 1236 }, { "epoch": 4.02, "learning_rate": 0.0001786442664309554, "loss": 2.5338, "step": 1239 }, { "epoch": 4.03, "learning_rate": 0.0001785197460903021, "loss": 2.4958, "step": 1242 }, { "epoch": 4.04, "learning_rate": 0.0001783949074444643, "loss": 2.4291, "step": 1245 }, { "epoch": 4.05, "learning_rate": 0.00017826975099951583, "loss": 2.4112, "step": 1248 }, { "epoch": 4.06, "learning_rate": 0.0001781442772628188, "loss": 2.4166, "step": 1251 }, { "epoch": 4.07, "learning_rate": 0.00017801848674302154, "loss": 2.5571, "step": 1254 }, { "epoch": 4.08, "learning_rate": 0.00017789237995005668, "loss": 2.4778, "step": 1257 }, { "epoch": 4.09, "learning_rate": 0.0001777659573951388, "loss": 2.5491, "step": 1260 }, { "epoch": 4.1, "learning_rate": 0.00017763921959076273, "loss": 2.4311, "step": 1263 }, { "epoch": 4.11, "learning_rate": 0.00017751216705070105, "loss": 2.4439, "step": 1266 }, { "epoch": 4.12, "learning_rate": 0.00017738480029000234, "loss": 2.4623, "step": 1269 }, { "epoch": 4.13, "learning_rate": 0.000177257119824989, "loss": 2.5291, "step": 1272 }, { "epoch": 4.14, "learning_rate": 0.00017712912617325502, "loss": 2.4934, "step": 1275 }, { "epoch": 4.15, "learning_rate": 0.0001770008198536641, "loss": 2.5712, "step": 1278 }, { "epoch": 4.16, "learning_rate": 0.0001768722013863474, "loss": 2.5426, "step": 1281 }, { "epoch": 4.17, "learning_rate": 0.00017674327129270148, "loss": 2.4619, "step": 1284 }, { "epoch": 4.18, "learning_rate": 0.00017661403009538616, "loss": 2.484, "step": 1287 }, { "epoch": 4.19, "learning_rate": 0.00017648447831832242, "loss": 2.4566, "step": 1290 }, { "epoch": 4.2, "learning_rate": 0.0001763546164866903, "loss": 2.5267, "step": 1293 }, { "epoch": 4.21, "learning_rate": 0.00017622444512692672, "loss": 2.4614, "step": 1296 }, { "epoch": 4.22, "learning_rate": 0.00017609396476672343, "loss": 2.4796, "step": 1299 }, { "epoch": 4.23, "learning_rate": 0.0001759631759350247, "loss": 2.3971, "step": 1302 }, { "epoch": 4.24, "learning_rate": 0.0001758320791620254, "loss": 2.4879, "step": 1305 }, { "epoch": 4.25, "learning_rate": 0.0001757006749791687, "loss": 2.5324, "step": 1308 }, { "epoch": 4.26, "learning_rate": 0.00017556896391914394, "loss": 2.3853, "step": 1311 }, { "epoch": 4.27, "learning_rate": 0.0001754369465158845, "loss": 2.455, "step": 1314 }, { "epoch": 4.28, "learning_rate": 0.0001753046233045656, "loss": 2.4964, "step": 1317 }, { "epoch": 4.29, "learning_rate": 0.0001751719948216022, "loss": 2.4615, "step": 1320 }, { "epoch": 4.3, "learning_rate": 0.00017503906160464672, "loss": 2.4745, "step": 1323 }, { "epoch": 4.31, "learning_rate": 0.00017490582419258697, "loss": 2.4068, "step": 1326 }, { "epoch": 4.31, "learning_rate": 0.00017477228312554388, "loss": 2.4827, "step": 1329 }, { "epoch": 4.32, "learning_rate": 0.00017463843894486937, "loss": 2.4002, "step": 1332 }, { "epoch": 4.33, "learning_rate": 0.00017450429219314408, "loss": 2.3769, "step": 1335 }, { "epoch": 4.34, "learning_rate": 0.00017436984341417532, "loss": 2.4319, "step": 1338 }, { "epoch": 4.35, "learning_rate": 0.00017423509315299458, "loss": 2.4803, "step": 1341 }, { "epoch": 4.36, "learning_rate": 0.00017410004195585573, "loss": 2.4286, "step": 1344 }, { "epoch": 4.37, "learning_rate": 0.00017396469037023242, "loss": 2.4884, "step": 1347 }, { "epoch": 4.38, "learning_rate": 0.00017382903894481611, "loss": 2.4376, "step": 1350 }, { "epoch": 4.39, "learning_rate": 0.00017369308822951367, "loss": 2.4966, "step": 1353 }, { "epoch": 4.4, "learning_rate": 0.00017355683877544532, "loss": 2.5422, "step": 1356 }, { "epoch": 4.41, "learning_rate": 0.00017342029113494233, "loss": 2.4287, "step": 1359 }, { "epoch": 4.42, "learning_rate": 0.00017328344586154467, "loss": 2.445, "step": 1362 }, { "epoch": 4.43, "learning_rate": 0.0001731463035099989, "loss": 2.4555, "step": 1365 }, { "epoch": 4.44, "learning_rate": 0.00017300886463625595, "loss": 2.4885, "step": 1368 }, { "epoch": 4.45, "learning_rate": 0.00017287112979746868, "loss": 2.4137, "step": 1371 }, { "epoch": 4.46, "learning_rate": 0.00017273309955198984, "loss": 2.494, "step": 1374 }, { "epoch": 4.47, "learning_rate": 0.0001725947744593697, "loss": 2.4098, "step": 1377 }, { "epoch": 4.48, "learning_rate": 0.0001724561550803537, "loss": 2.4887, "step": 1380 }, { "epoch": 4.49, "learning_rate": 0.00017231724197688033, "loss": 2.4888, "step": 1383 }, { "epoch": 4.5, "learning_rate": 0.0001721780357120788, "loss": 2.5013, "step": 1386 }, { "epoch": 4.51, "learning_rate": 0.00017203853685026675, "loss": 2.422, "step": 1389 }, { "epoch": 4.52, "learning_rate": 0.00017189874595694788, "loss": 2.4682, "step": 1392 }, { "epoch": 4.53, "learning_rate": 0.00017175866359880982, "loss": 2.3751, "step": 1395 }, { "epoch": 4.54, "learning_rate": 0.00017161829034372168, "loss": 2.4773, "step": 1398 }, { "epoch": 4.55, "learning_rate": 0.00017147762676073187, "loss": 2.4837, "step": 1401 }, { "epoch": 4.56, "learning_rate": 0.0001713366734200657, "loss": 2.4487, "step": 1404 }, { "epoch": 4.57, "learning_rate": 0.00017119543089312317, "loss": 2.4272, "step": 1407 }, { "epoch": 4.58, "learning_rate": 0.00017105389975247647, "loss": 2.4699, "step": 1410 }, { "epoch": 4.59, "learning_rate": 0.00017091208057186792, "loss": 2.4837, "step": 1413 }, { "epoch": 4.6, "learning_rate": 0.00017076997392620737, "loss": 2.4351, "step": 1416 }, { "epoch": 4.61, "learning_rate": 0.0001706275803915701, "loss": 2.3902, "step": 1419 }, { "epoch": 4.62, "learning_rate": 0.00017048490054519434, "loss": 2.3355, "step": 1422 }, { "epoch": 4.63, "learning_rate": 0.00017034193496547902, "loss": 2.3968, "step": 1425 }, { "epoch": 4.64, "learning_rate": 0.00017019868423198134, "loss": 2.4197, "step": 1428 }, { "epoch": 4.65, "learning_rate": 0.00017005514892541444, "loss": 2.4192, "step": 1431 }, { "epoch": 4.66, "learning_rate": 0.00016991132962764516, "loss": 2.359, "step": 1434 }, { "epoch": 4.67, "learning_rate": 0.00016976722692169148, "loss": 2.4068, "step": 1437 }, { "epoch": 4.68, "learning_rate": 0.00016962284139172037, "loss": 2.4527, "step": 1440 }, { "epoch": 4.69, "learning_rate": 0.00016947817362304525, "loss": 2.4723, "step": 1443 }, { "epoch": 4.69, "learning_rate": 0.00016933322420212372, "loss": 2.4029, "step": 1446 }, { "epoch": 4.7, "learning_rate": 0.00016918799371655512, "loss": 2.4574, "step": 1449 }, { "epoch": 4.71, "learning_rate": 0.00016904248275507818, "loss": 2.4426, "step": 1452 }, { "epoch": 4.72, "learning_rate": 0.00016889669190756868, "loss": 2.4689, "step": 1455 }, { "epoch": 4.73, "learning_rate": 0.00016875062176503693, "loss": 2.407, "step": 1458 }, { "epoch": 4.74, "learning_rate": 0.0001686042729196255, "loss": 2.3877, "step": 1461 }, { "epoch": 4.75, "learning_rate": 0.0001684576459646068, "loss": 2.394, "step": 1464 }, { "epoch": 4.76, "learning_rate": 0.00016831074149438056, "loss": 2.4613, "step": 1467 }, { "epoch": 4.77, "learning_rate": 0.00016816356010447163, "loss": 2.3824, "step": 1470 }, { "epoch": 4.78, "learning_rate": 0.0001680161023915273, "loss": 2.5161, "step": 1473 }, { "epoch": 4.79, "learning_rate": 0.00016786836895331514, "loss": 2.3828, "step": 1476 }, { "epoch": 4.8, "learning_rate": 0.00016772036038872039, "loss": 2.4413, "step": 1479 }, { "epoch": 4.81, "learning_rate": 0.0001675720772977437, "loss": 2.3981, "step": 1482 }, { "epoch": 4.82, "learning_rate": 0.00016742352028149843, "loss": 2.4631, "step": 1485 }, { "epoch": 4.83, "learning_rate": 0.0001672746899422086, "loss": 2.4012, "step": 1488 }, { "epoch": 4.84, "learning_rate": 0.000167125586883206, "loss": 2.4043, "step": 1491 }, { "epoch": 4.85, "learning_rate": 0.00016697621170892824, "loss": 2.3889, "step": 1494 }, { "epoch": 4.86, "learning_rate": 0.00016682656502491576, "loss": 2.4473, "step": 1497 }, { "epoch": 4.87, "learning_rate": 0.0001666766474378099, "loss": 2.5165, "step": 1500 }, { "epoch": 4.88, "learning_rate": 0.00016652645955535006, "loss": 2.5155, "step": 1503 }, { "epoch": 4.89, "learning_rate": 0.00016637600198637133, "loss": 2.3923, "step": 1506 }, { "epoch": 4.9, "learning_rate": 0.00016622527534080218, "loss": 2.4353, "step": 1509 }, { "epoch": 4.91, "learning_rate": 0.00016607428022966182, "loss": 2.3346, "step": 1512 }, { "epoch": 4.92, "learning_rate": 0.00016592301726505771, "loss": 2.3873, "step": 1515 }, { "epoch": 4.93, "learning_rate": 0.00016577148706018328, "loss": 2.4799, "step": 1518 }, { "epoch": 4.94, "learning_rate": 0.00016561969022931515, "loss": 2.4228, "step": 1521 }, { "epoch": 4.95, "learning_rate": 0.0001654676273878109, "loss": 2.4024, "step": 1524 }, { "epoch": 4.96, "learning_rate": 0.00016531529915210643, "loss": 2.386, "step": 1527 }, { "epoch": 4.97, "learning_rate": 0.0001651627061397135, "loss": 2.4365, "step": 1530 }, { "epoch": 4.98, "learning_rate": 0.00016500984896921725, "loss": 2.3997, "step": 1533 }, { "epoch": 4.99, "learning_rate": 0.00016485672826027363, "loss": 2.3659, "step": 1536 }, { "epoch": 5.0, "learning_rate": 0.00016470334463360698, "loss": 2.4531, "step": 1539 }, { "epoch": 5.0, "eval_accuracy": 0.6106430794745761, "eval_loss": 2.3084843158721924, "eval_runtime": 16.3383, "eval_samples_per_second": 134.469, "eval_steps_per_second": 67.265, "step": 1540 }, { "epoch": 5.01, "learning_rate": 0.00016454969871100743, "loss": 2.376, "step": 1542 }, { "epoch": 5.02, "learning_rate": 0.0001643957911153284, "loss": 2.2957, "step": 1545 }, { "epoch": 5.03, "learning_rate": 0.00016424162247048412, "loss": 2.3557, "step": 1548 }, { "epoch": 5.04, "learning_rate": 0.00016408719340144705, "loss": 2.3722, "step": 1551 }, { "epoch": 5.05, "learning_rate": 0.00016393250453424534, "loss": 2.3903, "step": 1554 }, { "epoch": 5.06, "learning_rate": 0.0001637775564959604, "loss": 2.3326, "step": 1557 }, { "epoch": 5.06, "learning_rate": 0.00016362234991472416, "loss": 2.3791, "step": 1560 }, { "epoch": 5.07, "learning_rate": 0.00016346688541971668, "loss": 2.4608, "step": 1563 }, { "epoch": 5.08, "learning_rate": 0.00016331116364116363, "loss": 2.3049, "step": 1566 }, { "epoch": 5.09, "learning_rate": 0.00016315518521033354, "loss": 2.437, "step": 1569 }, { "epoch": 5.1, "learning_rate": 0.00016299895075953547, "loss": 2.4088, "step": 1572 }, { "epoch": 5.11, "learning_rate": 0.0001628424609221163, "loss": 2.4097, "step": 1575 }, { "epoch": 5.12, "learning_rate": 0.00016268571633245812, "loss": 2.3635, "step": 1578 }, { "epoch": 5.13, "learning_rate": 0.00016252871762597592, "loss": 2.4373, "step": 1581 }, { "epoch": 5.14, "learning_rate": 0.00016237146543911463, "loss": 2.2713, "step": 1584 }, { "epoch": 5.15, "learning_rate": 0.00016221396040934694, "loss": 2.4049, "step": 1587 }, { "epoch": 5.16, "learning_rate": 0.00016205620317517034, "loss": 2.3796, "step": 1590 }, { "epoch": 5.17, "learning_rate": 0.00016189819437610484, "loss": 2.3642, "step": 1593 }, { "epoch": 5.18, "learning_rate": 0.00016173993465269022, "loss": 2.3668, "step": 1596 }, { "epoch": 5.19, "learning_rate": 0.00016158142464648342, "loss": 2.4196, "step": 1599 }, { "epoch": 5.2, "learning_rate": 0.00016142266500005604, "loss": 2.488, "step": 1602 }, { "epoch": 5.21, "learning_rate": 0.00016126365635699166, "loss": 2.3974, "step": 1605 }, { "epoch": 5.22, "learning_rate": 0.00016110439936188318, "loss": 2.4516, "step": 1608 }, { "epoch": 5.23, "learning_rate": 0.00016094489466033043, "loss": 2.3589, "step": 1611 }, { "epoch": 5.24, "learning_rate": 0.0001607851428989372, "loss": 2.4077, "step": 1614 }, { "epoch": 5.25, "learning_rate": 0.00016062514472530898, "loss": 2.3902, "step": 1617 }, { "epoch": 5.26, "learning_rate": 0.0001604649007880501, "loss": 2.3319, "step": 1620 }, { "epoch": 5.27, "learning_rate": 0.00016030441173676117, "loss": 2.3729, "step": 1623 }, { "epoch": 5.28, "learning_rate": 0.00016014367822203646, "loss": 2.3052, "step": 1626 }, { "epoch": 5.29, "learning_rate": 0.0001599827008954613, "loss": 2.3613, "step": 1629 }, { "epoch": 5.3, "learning_rate": 0.0001598214804096093, "loss": 2.4415, "step": 1632 }, { "epoch": 5.31, "learning_rate": 0.00015966001741803983, "loss": 2.3959, "step": 1635 }, { "epoch": 5.32, "learning_rate": 0.0001594983125752954, "loss": 2.3294, "step": 1638 }, { "epoch": 5.33, "learning_rate": 0.0001593363665368988, "loss": 2.3211, "step": 1641 }, { "epoch": 5.34, "learning_rate": 0.00015917417995935077, "loss": 2.3113, "step": 1644 }, { "epoch": 5.35, "learning_rate": 0.00015901175350012698, "loss": 2.3507, "step": 1647 }, { "epoch": 5.36, "learning_rate": 0.00015884908781767565, "loss": 2.3533, "step": 1650 }, { "epoch": 5.37, "learning_rate": 0.00015868618357141472, "loss": 2.4636, "step": 1653 }, { "epoch": 5.38, "learning_rate": 0.00015852304142172923, "loss": 2.4222, "step": 1656 }, { "epoch": 5.39, "learning_rate": 0.00015835966202996867, "loss": 2.4257, "step": 1659 }, { "epoch": 5.4, "learning_rate": 0.00015819604605844418, "loss": 2.3802, "step": 1662 }, { "epoch": 5.41, "learning_rate": 0.00015803219417042608, "loss": 2.2824, "step": 1665 }, { "epoch": 5.42, "learning_rate": 0.00015786810703014096, "loss": 2.4089, "step": 1668 }, { "epoch": 5.43, "learning_rate": 0.0001577037853027691, "loss": 2.3537, "step": 1671 }, { "epoch": 5.44, "learning_rate": 0.00015753922965444184, "loss": 2.3758, "step": 1674 }, { "epoch": 5.44, "learning_rate": 0.0001573744407522386, "loss": 2.3749, "step": 1677 }, { "epoch": 5.45, "learning_rate": 0.00015720941926418455, "loss": 2.3841, "step": 1680 }, { "epoch": 5.46, "learning_rate": 0.0001570441658592477, "loss": 2.3546, "step": 1683 }, { "epoch": 5.47, "learning_rate": 0.00015687868120733614, "loss": 2.3845, "step": 1686 }, { "epoch": 5.48, "learning_rate": 0.00015671296597929535, "loss": 2.2959, "step": 1689 }, { "epoch": 5.49, "learning_rate": 0.00015654702084690568, "loss": 2.4619, "step": 1692 }, { "epoch": 5.5, "learning_rate": 0.0001563808464828794, "loss": 2.2943, "step": 1695 }, { "epoch": 5.51, "learning_rate": 0.00015621444356085803, "loss": 2.3027, "step": 1698 }, { "epoch": 5.52, "learning_rate": 0.00015604781275540956, "loss": 2.4349, "step": 1701 }, { "epoch": 5.53, "learning_rate": 0.00015588095474202595, "loss": 2.3143, "step": 1704 }, { "epoch": 5.54, "learning_rate": 0.00015571387019712004, "loss": 2.3555, "step": 1707 }, { "epoch": 5.55, "learning_rate": 0.0001555465597980231, "loss": 2.4337, "step": 1710 }, { "epoch": 5.56, "learning_rate": 0.00015537902422298197, "loss": 2.393, "step": 1713 }, { "epoch": 5.57, "learning_rate": 0.00015521126415115623, "loss": 2.3029, "step": 1716 }, { "epoch": 5.58, "learning_rate": 0.00015504328026261566, "loss": 2.3065, "step": 1719 }, { "epoch": 5.59, "learning_rate": 0.0001548750732383372, "loss": 2.3214, "step": 1722 }, { "epoch": 5.6, "learning_rate": 0.00015470664376020246, "loss": 2.3422, "step": 1725 }, { "epoch": 5.61, "learning_rate": 0.00015453799251099478, "loss": 2.3227, "step": 1728 }, { "epoch": 5.62, "learning_rate": 0.00015436912017439657, "loss": 2.3816, "step": 1731 }, { "epoch": 5.63, "learning_rate": 0.00015420002743498645, "loss": 2.3966, "step": 1734 }, { "epoch": 5.64, "learning_rate": 0.00015403071497823652, "loss": 2.2734, "step": 1737 }, { "epoch": 5.65, "learning_rate": 0.0001538611834905096, "loss": 2.384, "step": 1740 }, { "epoch": 5.66, "learning_rate": 0.00015369143365905635, "loss": 2.3495, "step": 1743 }, { "epoch": 5.67, "learning_rate": 0.00015352146617201266, "loss": 2.3252, "step": 1746 }, { "epoch": 5.68, "learning_rate": 0.00015335128171839671, "loss": 2.349, "step": 1749 }, { "epoch": 5.69, "learning_rate": 0.00015318088098810622, "loss": 2.3845, "step": 1752 }, { "epoch": 5.7, "learning_rate": 0.0001530102646719156, "loss": 2.3248, "step": 1755 }, { "epoch": 5.71, "learning_rate": 0.0001528394334614733, "loss": 2.3738, "step": 1758 }, { "epoch": 5.72, "learning_rate": 0.00015266838804929892, "loss": 2.3512, "step": 1761 }, { "epoch": 5.73, "learning_rate": 0.00015249712912878031, "loss": 2.4223, "step": 1764 }, { "epoch": 5.74, "learning_rate": 0.00015232565739417092, "loss": 2.3593, "step": 1767 }, { "epoch": 5.75, "learning_rate": 0.00015215397354058686, "loss": 2.3934, "step": 1770 }, { "epoch": 5.76, "learning_rate": 0.00015198207826400413, "loss": 2.4059, "step": 1773 }, { "epoch": 5.77, "learning_rate": 0.00015180997226125592, "loss": 2.3081, "step": 1776 }, { "epoch": 5.78, "learning_rate": 0.00015163765623002945, "loss": 2.2689, "step": 1779 }, { "epoch": 5.79, "learning_rate": 0.00015146513086886356, "loss": 2.3314, "step": 1782 }, { "epoch": 5.8, "learning_rate": 0.00015129239687714557, "loss": 2.2807, "step": 1785 }, { "epoch": 5.81, "learning_rate": 0.00015111945495510857, "loss": 2.3481, "step": 1788 }, { "epoch": 5.81, "learning_rate": 0.0001509463058038286, "loss": 2.3724, "step": 1791 }, { "epoch": 5.82, "learning_rate": 0.00015077295012522174, "loss": 2.3322, "step": 1794 }, { "epoch": 5.83, "learning_rate": 0.00015059938862204127, "loss": 2.2553, "step": 1797 }, { "epoch": 5.84, "learning_rate": 0.0001504256219978749, "loss": 2.2632, "step": 1800 }, { "epoch": 5.85, "learning_rate": 0.0001502516509571418, "loss": 2.27, "step": 1803 }, { "epoch": 5.86, "learning_rate": 0.00015007747620508988, "loss": 2.3615, "step": 1806 }, { "epoch": 5.87, "learning_rate": 0.00014990309844779284, "loss": 2.2815, "step": 1809 }, { "epoch": 5.88, "learning_rate": 0.0001497285183921473, "loss": 2.3518, "step": 1812 }, { "epoch": 5.89, "learning_rate": 0.00014955373674586996, "loss": 2.3358, "step": 1815 }, { "epoch": 5.9, "learning_rate": 0.00014937875421749472, "loss": 2.3217, "step": 1818 }, { "epoch": 5.91, "learning_rate": 0.00014920357151636992, "loss": 2.3558, "step": 1821 }, { "epoch": 5.92, "learning_rate": 0.00014902818935265527, "loss": 2.3474, "step": 1824 }, { "epoch": 5.93, "learning_rate": 0.00014885260843731905, "loss": 2.3579, "step": 1827 }, { "epoch": 5.94, "learning_rate": 0.00014867682948213536, "loss": 2.3964, "step": 1830 }, { "epoch": 5.95, "learning_rate": 0.000148500853199681, "loss": 2.3697, "step": 1833 }, { "epoch": 5.96, "learning_rate": 0.00014832468030333265, "loss": 2.3099, "step": 1836 }, { "epoch": 5.97, "learning_rate": 0.00014814831150726428, "loss": 2.3651, "step": 1839 }, { "epoch": 5.98, "learning_rate": 0.00014797174752644382, "loss": 2.304, "step": 1842 }, { "epoch": 5.99, "learning_rate": 0.00014779498907663033, "loss": 2.3598, "step": 1845 }, { "epoch": 6.0, "learning_rate": 0.0001476180368743715, "loss": 2.2852, "step": 1848 }, { "epoch": 6.0, "eval_accuracy": 0.6175154625608633, "eval_loss": 2.245072364807129, "eval_runtime": 16.3034, "eval_samples_per_second": 134.757, "eval_steps_per_second": 67.409, "step": 1848 }, { "epoch": 6.01, "learning_rate": 0.00014744089163700025, "loss": 2.395, "step": 1851 }, { "epoch": 6.02, "learning_rate": 0.0001472635540826321, "loss": 2.3843, "step": 1854 }, { "epoch": 6.03, "learning_rate": 0.00014708602493016218, "loss": 2.3417, "step": 1857 }, { "epoch": 6.04, "learning_rate": 0.0001469083048992623, "loss": 2.2776, "step": 1860 }, { "epoch": 6.05, "learning_rate": 0.00014673039471037807, "loss": 2.2988, "step": 1863 }, { "epoch": 6.06, "learning_rate": 0.000146552295084726, "loss": 2.2561, "step": 1866 }, { "epoch": 6.07, "learning_rate": 0.00014637400674429057, "loss": 2.3886, "step": 1869 }, { "epoch": 6.08, "learning_rate": 0.00014619553041182116, "loss": 2.2859, "step": 1872 }, { "epoch": 6.09, "learning_rate": 0.00014601686681082934, "loss": 2.3711, "step": 1875 }, { "epoch": 6.1, "learning_rate": 0.00014583801666558576, "loss": 2.315, "step": 1878 }, { "epoch": 6.11, "learning_rate": 0.00014565898070111735, "loss": 2.2431, "step": 1881 }, { "epoch": 6.12, "learning_rate": 0.0001454797596432043, "loss": 2.3051, "step": 1884 }, { "epoch": 6.13, "learning_rate": 0.00014530035421837716, "loss": 2.3228, "step": 1887 }, { "epoch": 6.14, "learning_rate": 0.00014512076515391375, "loss": 2.3054, "step": 1890 }, { "epoch": 6.15, "learning_rate": 0.0001449409931778365, "loss": 2.3504, "step": 1893 }, { "epoch": 6.16, "learning_rate": 0.0001447610390189092, "loss": 2.328, "step": 1896 }, { "epoch": 6.17, "learning_rate": 0.00014458090340663428, "loss": 2.268, "step": 1899 }, { "epoch": 6.18, "learning_rate": 0.00014440058707124967, "loss": 2.3306, "step": 1902 }, { "epoch": 6.19, "learning_rate": 0.00014422009074372604, "loss": 2.2928, "step": 1905 }, { "epoch": 6.19, "learning_rate": 0.00014403941515576344, "loss": 2.3729, "step": 1908 }, { "epoch": 6.2, "learning_rate": 0.00014385856103978894, "loss": 2.2654, "step": 1911 }, { "epoch": 6.21, "learning_rate": 0.0001436775291289532, "loss": 2.3404, "step": 1914 }, { "epoch": 6.22, "learning_rate": 0.00014349632015712752, "loss": 2.2935, "step": 1917 }, { "epoch": 6.23, "learning_rate": 0.00014331493485890114, "loss": 2.2743, "step": 1920 }, { "epoch": 6.24, "learning_rate": 0.00014313337396957803, "loss": 2.4234, "step": 1923 }, { "epoch": 6.25, "learning_rate": 0.00014295163822517393, "loss": 2.3393, "step": 1926 }, { "epoch": 6.26, "learning_rate": 0.0001427697283624135, "loss": 2.3336, "step": 1929 }, { "epoch": 6.27, "learning_rate": 0.00014258764511872716, "loss": 2.3729, "step": 1932 }, { "epoch": 6.28, "learning_rate": 0.00014240538923224823, "loss": 2.3284, "step": 1935 }, { "epoch": 6.29, "learning_rate": 0.00014222296144180994, "loss": 2.3265, "step": 1938 }, { "epoch": 6.3, "learning_rate": 0.00014204036248694225, "loss": 2.3156, "step": 1941 }, { "epoch": 6.31, "learning_rate": 0.00014185759310786917, "loss": 2.2792, "step": 1944 }, { "epoch": 6.32, "learning_rate": 0.00014167465404550542, "loss": 2.2745, "step": 1947 }, { "epoch": 6.33, "learning_rate": 0.00014149154604145366, "loss": 2.2669, "step": 1950 }, { "epoch": 6.34, "learning_rate": 0.00014130826983800145, "loss": 2.2678, "step": 1953 }, { "epoch": 6.35, "learning_rate": 0.0001411248261781181, "loss": 2.3385, "step": 1956 }, { "epoch": 6.36, "learning_rate": 0.00014094121580545183, "loss": 2.3564, "step": 1959 }, { "epoch": 6.37, "learning_rate": 0.0001407574394643267, "loss": 2.2749, "step": 1962 }, { "epoch": 6.38, "learning_rate": 0.00014057349789973946, "loss": 2.2941, "step": 1965 }, { "epoch": 6.39, "learning_rate": 0.00014038939185735683, "loss": 2.2932, "step": 1968 }, { "epoch": 6.4, "learning_rate": 0.0001402051220835121, "loss": 2.2875, "step": 1971 }, { "epoch": 6.41, "learning_rate": 0.00014002068932520247, "loss": 2.3496, "step": 1974 }, { "epoch": 6.42, "learning_rate": 0.00013983609433008574, "loss": 2.273, "step": 1977 }, { "epoch": 6.43, "learning_rate": 0.0001396513378464774, "loss": 2.2399, "step": 1980 }, { "epoch": 6.44, "learning_rate": 0.00013946642062334766, "loss": 2.3506, "step": 1983 }, { "epoch": 6.45, "learning_rate": 0.00013928134341031825, "loss": 2.2949, "step": 1986 }, { "epoch": 6.46, "learning_rate": 0.00013909610695765948, "loss": 2.3473, "step": 1989 }, { "epoch": 6.47, "learning_rate": 0.00013891071201628728, "loss": 2.2964, "step": 1992 }, { "epoch": 6.48, "learning_rate": 0.00013872515933776, "loss": 2.3721, "step": 1995 }, { "epoch": 6.49, "learning_rate": 0.00013853944967427535, "loss": 2.3076, "step": 1998 }, { "epoch": 6.5, "learning_rate": 0.00013835358377866763, "loss": 2.3327, "step": 2001 }, { "epoch": 6.51, "learning_rate": 0.00013816756240440424, "loss": 2.2885, "step": 2004 }, { "epoch": 6.52, "learning_rate": 0.00013798138630558303, "loss": 2.3773, "step": 2007 }, { "epoch": 6.53, "learning_rate": 0.00013779505623692909, "loss": 2.3261, "step": 2010 }, { "epoch": 6.54, "learning_rate": 0.00013760857295379154, "loss": 2.3456, "step": 2013 }, { "epoch": 6.55, "learning_rate": 0.00013742193721214064, "loss": 2.422, "step": 2016 }, { "epoch": 6.56, "learning_rate": 0.00013723514976856483, "loss": 2.2936, "step": 2019 }, { "epoch": 6.56, "learning_rate": 0.00013704821138026737, "loss": 2.3485, "step": 2022 }, { "epoch": 6.57, "learning_rate": 0.00013686112280506346, "loss": 2.2716, "step": 2025 }, { "epoch": 6.58, "learning_rate": 0.00013667388480137716, "loss": 2.291, "step": 2028 }, { "epoch": 6.59, "learning_rate": 0.0001364864981282383, "loss": 2.3078, "step": 2031 }, { "epoch": 6.6, "learning_rate": 0.00013629896354527932, "loss": 2.3167, "step": 2034 }, { "epoch": 6.61, "learning_rate": 0.0001361112818127323, "loss": 2.3923, "step": 2037 }, { "epoch": 6.62, "learning_rate": 0.00013592345369142585, "loss": 2.3003, "step": 2040 }, { "epoch": 6.63, "learning_rate": 0.00013573547994278205, "loss": 2.2712, "step": 2043 }, { "epoch": 6.64, "learning_rate": 0.0001355473613288132, "loss": 2.3246, "step": 2046 }, { "epoch": 6.65, "learning_rate": 0.00013535909861211903, "loss": 2.2397, "step": 2049 }, { "epoch": 6.66, "learning_rate": 0.00013517069255588327, "loss": 2.2596, "step": 2052 }, { "epoch": 6.67, "learning_rate": 0.00013498214392387083, "loss": 2.1575, "step": 2055 }, { "epoch": 6.68, "learning_rate": 0.0001347934534804246, "loss": 2.2904, "step": 2058 }, { "epoch": 6.69, "learning_rate": 0.00013460462199046226, "loss": 2.1767, "step": 2061 }, { "epoch": 6.7, "learning_rate": 0.00013441565021947332, "loss": 2.2593, "step": 2064 }, { "epoch": 6.71, "learning_rate": 0.00013422653893351604, "loss": 2.3004, "step": 2067 }, { "epoch": 6.72, "learning_rate": 0.0001340372888992141, "loss": 2.2932, "step": 2070 }, { "epoch": 6.73, "learning_rate": 0.0001338479008837538, "loss": 2.3329, "step": 2073 }, { "epoch": 6.74, "learning_rate": 0.00013365837565488064, "loss": 2.2765, "step": 2076 }, { "epoch": 6.75, "learning_rate": 0.00013346871398089644, "loss": 2.2924, "step": 2079 }, { "epoch": 6.76, "learning_rate": 0.00013327891663065614, "loss": 2.2459, "step": 2082 }, { "epoch": 6.77, "learning_rate": 0.0001330889843735647, "loss": 2.1898, "step": 2085 }, { "epoch": 6.78, "learning_rate": 0.00013289891797957395, "loss": 2.2234, "step": 2088 }, { "epoch": 6.79, "learning_rate": 0.00013270871821917946, "loss": 2.3638, "step": 2091 }, { "epoch": 6.8, "learning_rate": 0.00013251838586341745, "loss": 2.3405, "step": 2094 }, { "epoch": 6.81, "learning_rate": 0.00013232792168386176, "loss": 2.2713, "step": 2097 }, { "epoch": 6.82, "learning_rate": 0.00013213732645262044, "loss": 2.2664, "step": 2100 }, { "epoch": 6.83, "learning_rate": 0.00013194660094233298, "loss": 2.2861, "step": 2103 }, { "epoch": 6.84, "learning_rate": 0.00013175574592616692, "loss": 2.2772, "step": 2106 }, { "epoch": 6.85, "learning_rate": 0.00013156476217781468, "loss": 2.2263, "step": 2109 }, { "epoch": 6.86, "learning_rate": 0.00013137365047149078, "loss": 2.3177, "step": 2112 }, { "epoch": 6.87, "learning_rate": 0.00013118241158192827, "loss": 2.2316, "step": 2115 }, { "epoch": 6.88, "learning_rate": 0.0001309910462843758, "loss": 2.3542, "step": 2118 }, { "epoch": 6.89, "learning_rate": 0.00013079955535459455, "loss": 2.2913, "step": 2121 }, { "epoch": 6.9, "learning_rate": 0.000130607939568855, "loss": 2.2785, "step": 2124 }, { "epoch": 6.91, "learning_rate": 0.00013041619970393352, "loss": 2.2508, "step": 2127 }, { "epoch": 6.92, "learning_rate": 0.0001302243365371098, "loss": 2.2514, "step": 2130 }, { "epoch": 6.93, "learning_rate": 0.00013003235084616324, "loss": 2.2517, "step": 2133 }, { "epoch": 6.94, "learning_rate": 0.00012984024340936983, "loss": 2.2517, "step": 2136 }, { "epoch": 6.94, "learning_rate": 0.00012964801500549931, "loss": 2.2079, "step": 2139 }, { "epoch": 6.95, "learning_rate": 0.00012945566641381159, "loss": 2.3606, "step": 2142 }, { "epoch": 6.96, "learning_rate": 0.00012926319841405394, "loss": 2.3764, "step": 2145 }, { "epoch": 6.97, "learning_rate": 0.00012907061178645763, "loss": 2.2781, "step": 2148 }, { "epoch": 6.98, "learning_rate": 0.00012887790731173486, "loss": 2.2436, "step": 2151 }, { "epoch": 6.99, "learning_rate": 0.0001286850857710755, "loss": 2.228, "step": 2154 }, { "epoch": 7.0, "eval_accuracy": 0.6243810506983248, "eval_loss": 2.193675994873047, "eval_runtime": 16.3306, "eval_samples_per_second": 134.533, "eval_steps_per_second": 67.297, "step": 2156 }, { "epoch": 7.0, "learning_rate": 0.00012849214794614407, "loss": 2.2569, "step": 2157 }, { "epoch": 7.01, "learning_rate": 0.0001282990946190764, "loss": 2.3658, "step": 2160 }, { "epoch": 7.02, "learning_rate": 0.00012810592657247656, "loss": 2.2651, "step": 2163 }, { "epoch": 7.03, "learning_rate": 0.0001279126445894138, "loss": 2.215, "step": 2166 }, { "epoch": 7.04, "learning_rate": 0.00012771924945341906, "loss": 2.2391, "step": 2169 }, { "epoch": 7.05, "learning_rate": 0.00012752574194848211, "loss": 2.2661, "step": 2172 }, { "epoch": 7.06, "learning_rate": 0.00012733212285904818, "loss": 2.1834, "step": 2175 }, { "epoch": 7.07, "learning_rate": 0.0001271383929700149, "loss": 2.2816, "step": 2178 }, { "epoch": 7.08, "learning_rate": 0.00012694455306672895, "loss": 2.2706, "step": 2181 }, { "epoch": 7.09, "learning_rate": 0.00012675060393498318, "loss": 2.2744, "step": 2184 }, { "epoch": 7.1, "learning_rate": 0.00012655654636101304, "loss": 2.2792, "step": 2187 }, { "epoch": 7.11, "learning_rate": 0.00012636238113149367, "loss": 2.3216, "step": 2190 }, { "epoch": 7.12, "learning_rate": 0.00012616810903353666, "loss": 2.2724, "step": 2193 }, { "epoch": 7.13, "learning_rate": 0.00012597373085468678, "loss": 2.2908, "step": 2196 }, { "epoch": 7.14, "learning_rate": 0.00012577924738291877, "loss": 2.3213, "step": 2199 }, { "epoch": 7.15, "learning_rate": 0.0001255846594066344, "loss": 2.2962, "step": 2202 }, { "epoch": 7.16, "learning_rate": 0.00012538996771465887, "loss": 2.2341, "step": 2205 }, { "epoch": 7.17, "learning_rate": 0.00012519517309623793, "loss": 2.3292, "step": 2208 }, { "epoch": 7.18, "learning_rate": 0.0001250002763410346, "loss": 2.2678, "step": 2211 }, { "epoch": 7.19, "learning_rate": 0.0001248052782391259, "loss": 2.2799, "step": 2214 }, { "epoch": 7.2, "learning_rate": 0.00012461017958099966, "loss": 2.2489, "step": 2217 }, { "epoch": 7.21, "learning_rate": 0.00012441498115755146, "loss": 2.3042, "step": 2220 }, { "epoch": 7.22, "learning_rate": 0.00012421968376008115, "loss": 2.1692, "step": 2223 }, { "epoch": 7.23, "learning_rate": 0.00012402428818028994, "loss": 2.3398, "step": 2226 }, { "epoch": 7.24, "learning_rate": 0.000123828795210277, "loss": 2.2435, "step": 2229 }, { "epoch": 7.25, "learning_rate": 0.00012363320564253637, "loss": 2.2838, "step": 2232 }, { "epoch": 7.26, "learning_rate": 0.0001234375202699535, "loss": 2.1099, "step": 2235 }, { "epoch": 7.27, "learning_rate": 0.00012324173988580235, "loss": 2.239, "step": 2238 }, { "epoch": 7.28, "learning_rate": 0.0001230458652837421, "loss": 2.2111, "step": 2241 }, { "epoch": 7.29, "learning_rate": 0.00012284989725781377, "loss": 2.2932, "step": 2244 }, { "epoch": 7.3, "learning_rate": 0.0001226538366024371, "loss": 2.2204, "step": 2247 }, { "epoch": 7.31, "learning_rate": 0.00012245768411240737, "loss": 2.2478, "step": 2250 }, { "epoch": 7.31, "learning_rate": 0.00012226144058289216, "loss": 2.2759, "step": 2253 }, { "epoch": 7.32, "learning_rate": 0.00012206510680942806, "loss": 2.297, "step": 2256 }, { "epoch": 7.33, "learning_rate": 0.00012186868358791756, "loss": 2.2437, "step": 2259 }, { "epoch": 7.34, "learning_rate": 0.00012167217171462566, "loss": 2.2756, "step": 2262 }, { "epoch": 7.35, "learning_rate": 0.00012147557198617678, "loss": 2.2352, "step": 2265 }, { "epoch": 7.36, "learning_rate": 0.00012127888519955157, "loss": 2.2638, "step": 2268 }, { "epoch": 7.37, "learning_rate": 0.0001210821121520835, "loss": 2.1679, "step": 2271 }, { "epoch": 7.38, "learning_rate": 0.0001208852536414557, "loss": 2.2499, "step": 2274 }, { "epoch": 7.39, "learning_rate": 0.00012068831046569789, "loss": 2.2459, "step": 2277 }, { "epoch": 7.4, "learning_rate": 0.00012049128342318288, "loss": 2.1518, "step": 2280 }, { "epoch": 7.41, "learning_rate": 0.00012029417331262349, "loss": 2.246, "step": 2283 }, { "epoch": 7.42, "learning_rate": 0.00012009698093306936, "loss": 2.2471, "step": 2286 }, { "epoch": 7.43, "learning_rate": 0.00011989970708390353, "loss": 2.2332, "step": 2289 }, { "epoch": 7.44, "learning_rate": 0.00011970235256483934, "loss": 2.2554, "step": 2292 }, { "epoch": 7.45, "learning_rate": 0.00011950491817591717, "loss": 2.2454, "step": 2295 }, { "epoch": 7.46, "learning_rate": 0.00011930740471750121, "loss": 2.2378, "step": 2298 }, { "epoch": 7.47, "learning_rate": 0.00011910981299027608, "loss": 2.2978, "step": 2301 }, { "epoch": 7.48, "learning_rate": 0.00011891214379524375, "loss": 2.2042, "step": 2304 }, { "epoch": 7.49, "learning_rate": 0.0001187143979337203, "loss": 2.2105, "step": 2307 }, { "epoch": 7.5, "learning_rate": 0.00011851657620733243, "loss": 2.2343, "step": 2310 }, { "epoch": 7.51, "learning_rate": 0.00011831867941801455, "loss": 2.2197, "step": 2313 }, { "epoch": 7.52, "learning_rate": 0.00011812070836800533, "loss": 2.3094, "step": 2316 }, { "epoch": 7.53, "learning_rate": 0.00011792266385984433, "loss": 2.2106, "step": 2319 }, { "epoch": 7.54, "learning_rate": 0.00011772454669636912, "loss": 2.3657, "step": 2322 }, { "epoch": 7.55, "learning_rate": 0.00011752635768071167, "loss": 2.3096, "step": 2325 }, { "epoch": 7.56, "learning_rate": 0.0001173280976162952, "loss": 2.1856, "step": 2328 }, { "epoch": 7.57, "learning_rate": 0.00011712976730683108, "loss": 2.236, "step": 2331 }, { "epoch": 7.58, "learning_rate": 0.00011693136755631528, "loss": 2.2304, "step": 2334 }, { "epoch": 7.59, "learning_rate": 0.00011673289916902539, "loss": 2.2346, "step": 2337 }, { "epoch": 7.6, "learning_rate": 0.00011653436294951724, "loss": 2.2353, "step": 2340 }, { "epoch": 7.61, "learning_rate": 0.00011633575970262152, "loss": 2.2511, "step": 2343 }, { "epoch": 7.62, "learning_rate": 0.0001161370902334408, "loss": 2.2557, "step": 2346 }, { "epoch": 7.63, "learning_rate": 0.00011593835534734596, "loss": 2.2501, "step": 2349 }, { "epoch": 7.64, "learning_rate": 0.00011573955584997318, "loss": 2.2231, "step": 2352 }, { "epoch": 7.65, "learning_rate": 0.00011554069254722051, "loss": 2.1851, "step": 2355 }, { "epoch": 7.66, "learning_rate": 0.00011534176624524464, "loss": 2.2573, "step": 2358 }, { "epoch": 7.67, "learning_rate": 0.00011514277775045768, "loss": 2.2314, "step": 2361 }, { "epoch": 7.68, "learning_rate": 0.00011494372786952384, "loss": 2.1805, "step": 2364 }, { "epoch": 7.69, "learning_rate": 0.00011474461740935621, "loss": 2.2441, "step": 2367 }, { "epoch": 7.69, "learning_rate": 0.0001145454471771134, "loss": 2.2018, "step": 2370 }, { "epoch": 7.7, "learning_rate": 0.0001143462179801964, "loss": 2.1672, "step": 2373 }, { "epoch": 7.71, "learning_rate": 0.00011414693062624515, "loss": 2.2151, "step": 2376 }, { "epoch": 7.72, "learning_rate": 0.00011394758592313543, "loss": 2.2059, "step": 2379 }, { "epoch": 7.73, "learning_rate": 0.00011374818467897541, "loss": 2.2516, "step": 2382 }, { "epoch": 7.74, "learning_rate": 0.00011354872770210256, "loss": 2.1991, "step": 2385 }, { "epoch": 7.75, "learning_rate": 0.00011334921580108027, "loss": 2.2307, "step": 2388 }, { "epoch": 7.76, "learning_rate": 0.00011314964978469445, "loss": 2.1478, "step": 2391 }, { "epoch": 7.77, "learning_rate": 0.00011295003046195058, "loss": 2.1494, "step": 2394 }, { "epoch": 7.78, "learning_rate": 0.00011275035864207017, "loss": 2.289, "step": 2397 }, { "epoch": 7.79, "learning_rate": 0.00011255063513448743, "loss": 2.1709, "step": 2400 }, { "epoch": 7.8, "learning_rate": 0.00011235086074884622, "loss": 2.2586, "step": 2403 }, { "epoch": 7.81, "learning_rate": 0.00011215103629499661, "loss": 2.2279, "step": 2406 }, { "epoch": 7.82, "learning_rate": 0.00011195116258299169, "loss": 2.2403, "step": 2409 }, { "epoch": 7.83, "learning_rate": 0.00011175124042308416, "loss": 2.2453, "step": 2412 }, { "epoch": 7.84, "learning_rate": 0.00011155127062572314, "loss": 2.2293, "step": 2415 }, { "epoch": 7.85, "learning_rate": 0.00011135125400155091, "loss": 2.2598, "step": 2418 }, { "epoch": 7.86, "learning_rate": 0.00011115119136139951, "loss": 2.2331, "step": 2421 }, { "epoch": 7.87, "learning_rate": 0.00011095108351628758, "loss": 2.2416, "step": 2424 }, { "epoch": 7.88, "learning_rate": 0.00011075093127741695, "loss": 2.1992, "step": 2427 }, { "epoch": 7.89, "learning_rate": 0.00011055073545616952, "loss": 2.1727, "step": 2430 }, { "epoch": 7.9, "learning_rate": 0.0001103504968641037, "loss": 2.2371, "step": 2433 }, { "epoch": 7.91, "learning_rate": 0.00011015021631295149, "loss": 2.23, "step": 2436 }, { "epoch": 7.92, "learning_rate": 0.00010994989461461476, "loss": 2.1677, "step": 2439 }, { "epoch": 7.93, "learning_rate": 0.00010974953258116238, "loss": 2.2252, "step": 2442 }, { "epoch": 7.94, "learning_rate": 0.00010954913102482664, "loss": 2.2119, "step": 2445 }, { "epoch": 7.95, "learning_rate": 0.000109348690758, "loss": 2.2567, "step": 2448 }, { "epoch": 7.96, "learning_rate": 0.00010914821259323202, "loss": 2.2209, "step": 2451 }, { "epoch": 7.97, "learning_rate": 0.00010894769734322567, "loss": 2.2701, "step": 2454 }, { "epoch": 7.98, "learning_rate": 0.00010874714582083438, "loss": 2.1552, "step": 2457 }, { "epoch": 7.99, "learning_rate": 0.00010854655883905869, "loss": 2.1527, "step": 2460 }, { "epoch": 8.0, "learning_rate": 0.0001083459372110427, "loss": 2.2013, "step": 2463 }, { "epoch": 8.0, "eval_accuracy": 0.6309924029431118, "eval_loss": 2.1445603370666504, "eval_runtime": 16.3353, "eval_samples_per_second": 134.494, "eval_steps_per_second": 67.278, "step": 2464 }, { "epoch": 8.01, "learning_rate": 0.00010814528175007108, "loss": 2.22, "step": 2466 }, { "epoch": 8.02, "learning_rate": 0.0001079445932695657, "loss": 2.2225, "step": 2469 }, { "epoch": 8.03, "learning_rate": 0.00010774387258308217, "loss": 2.2667, "step": 2472 }, { "epoch": 8.04, "learning_rate": 0.00010754312050430668, "loss": 2.2468, "step": 2475 }, { "epoch": 8.05, "learning_rate": 0.00010734233784705276, "loss": 2.2416, "step": 2478 }, { "epoch": 8.06, "learning_rate": 0.00010714152542525781, "loss": 2.1588, "step": 2481 }, { "epoch": 8.06, "learning_rate": 0.00010694068405297997, "loss": 2.2093, "step": 2484 }, { "epoch": 8.07, "learning_rate": 0.00010673981454439463, "loss": 2.2511, "step": 2487 }, { "epoch": 8.08, "learning_rate": 0.00010653891771379134, "loss": 2.2265, "step": 2490 }, { "epoch": 8.09, "learning_rate": 0.00010633799437557039, "loss": 2.2257, "step": 2493 }, { "epoch": 8.1, "learning_rate": 0.00010613704534423949, "loss": 2.2588, "step": 2496 }, { "epoch": 8.11, "learning_rate": 0.00010593607143441053, "loss": 2.1435, "step": 2499 }, { "epoch": 8.12, "learning_rate": 0.00010573507346079625, "loss": 2.1494, "step": 2502 }, { "epoch": 8.13, "learning_rate": 0.00010553405223820693, "loss": 2.2067, "step": 2505 }, { "epoch": 8.14, "learning_rate": 0.00010533300858154715, "loss": 2.2174, "step": 2508 }, { "epoch": 8.15, "learning_rate": 0.00010513194330581233, "loss": 2.168, "step": 2511 }, { "epoch": 8.16, "learning_rate": 0.00010493085722608562, "loss": 2.2395, "step": 2514 }, { "epoch": 8.17, "learning_rate": 0.00010472975115753452, "loss": 2.1584, "step": 2517 }, { "epoch": 8.18, "learning_rate": 0.00010452862591540742, "loss": 2.1738, "step": 2520 }, { "epoch": 8.19, "learning_rate": 0.0001043274823150306, "loss": 2.2533, "step": 2523 }, { "epoch": 8.2, "learning_rate": 0.00010412632117180471, "loss": 2.308, "step": 2526 }, { "epoch": 8.21, "learning_rate": 0.00010392514330120145, "loss": 2.1664, "step": 2529 }, { "epoch": 8.22, "learning_rate": 0.00010372394951876043, "loss": 2.2669, "step": 2532 }, { "epoch": 8.23, "learning_rate": 0.00010352274064008567, "loss": 2.1636, "step": 2535 }, { "epoch": 8.24, "learning_rate": 0.00010332151748084242, "loss": 2.2729, "step": 2538 }, { "epoch": 8.25, "learning_rate": 0.00010312028085675391, "loss": 2.2097, "step": 2541 }, { "epoch": 8.26, "learning_rate": 0.00010291903158359783, "loss": 2.2306, "step": 2544 }, { "epoch": 8.27, "learning_rate": 0.0001027177704772032, "loss": 2.1675, "step": 2547 }, { "epoch": 8.28, "learning_rate": 0.00010251649835344696, "loss": 2.266, "step": 2550 }, { "epoch": 8.29, "learning_rate": 0.0001023152160282508, "loss": 2.2716, "step": 2553 }, { "epoch": 8.3, "learning_rate": 0.00010211392431757773, "loss": 2.2013, "step": 2556 }, { "epoch": 8.31, "learning_rate": 0.00010191262403742878, "loss": 2.1526, "step": 2559 }, { "epoch": 8.32, "learning_rate": 0.00010171131600383974, "loss": 2.1521, "step": 2562 }, { "epoch": 8.33, "learning_rate": 0.00010151000103287784, "loss": 2.2781, "step": 2565 }, { "epoch": 8.34, "learning_rate": 0.00010130867994063839, "loss": 2.1813, "step": 2568 }, { "epoch": 8.35, "learning_rate": 0.00010110735354324159, "loss": 2.2153, "step": 2571 }, { "epoch": 8.36, "learning_rate": 0.00010090602265682906, "loss": 2.2226, "step": 2574 }, { "epoch": 8.37, "learning_rate": 0.00010070468809756068, "loss": 2.2177, "step": 2577 }, { "epoch": 8.38, "learning_rate": 0.00010050335068161123, "loss": 2.186, "step": 2580 }, { "epoch": 8.39, "learning_rate": 0.00010030201122516696, "loss": 2.2026, "step": 2583 }, { "epoch": 8.4, "learning_rate": 0.00010010067054442251, "loss": 2.2229, "step": 2586 }, { "epoch": 8.41, "learning_rate": 9.989932945557751e-05, "loss": 2.2416, "step": 2589 }, { "epoch": 8.42, "learning_rate": 9.969798877483308e-05, "loss": 2.2854, "step": 2592 }, { "epoch": 8.43, "learning_rate": 9.949664931838882e-05, "loss": 2.158, "step": 2595 }, { "epoch": 8.44, "learning_rate": 9.929531190243932e-05, "loss": 2.2394, "step": 2598 }, { "epoch": 8.44, "learning_rate": 9.909397734317095e-05, "loss": 2.1703, "step": 2601 }, { "epoch": 8.45, "learning_rate": 9.889264645675843e-05, "loss": 2.2031, "step": 2604 }, { "epoch": 8.46, "learning_rate": 9.869132005936163e-05, "loss": 2.2224, "step": 2607 }, { "epoch": 8.47, "learning_rate": 9.848999896712217e-05, "loss": 2.2693, "step": 2610 }, { "epoch": 8.48, "learning_rate": 9.82886839961603e-05, "loss": 2.2381, "step": 2613 }, { "epoch": 8.49, "learning_rate": 9.808737596257121e-05, "loss": 2.1839, "step": 2616 }, { "epoch": 8.5, "learning_rate": 9.788607568242229e-05, "loss": 2.129, "step": 2619 }, { "epoch": 8.51, "learning_rate": 9.768478397174922e-05, "loss": 2.121, "step": 2622 }, { "epoch": 8.52, "learning_rate": 9.748350164655306e-05, "loss": 2.2323, "step": 2625 }, { "epoch": 8.53, "learning_rate": 9.728222952279684e-05, "loss": 2.2613, "step": 2628 }, { "epoch": 8.54, "learning_rate": 9.708096841640222e-05, "loss": 2.1113, "step": 2631 }, { "epoch": 8.55, "learning_rate": 9.687971914324607e-05, "loss": 2.1729, "step": 2634 }, { "epoch": 8.56, "learning_rate": 9.667848251915758e-05, "loss": 2.2271, "step": 2637 }, { "epoch": 8.57, "learning_rate": 9.647725935991436e-05, "loss": 2.2319, "step": 2640 }, { "epoch": 8.58, "learning_rate": 9.627605048123959e-05, "loss": 2.2244, "step": 2643 }, { "epoch": 8.59, "learning_rate": 9.607485669879857e-05, "loss": 2.1239, "step": 2646 }, { "epoch": 8.6, "learning_rate": 9.587367882819532e-05, "loss": 2.2429, "step": 2649 }, { "epoch": 8.61, "learning_rate": 9.567251768496938e-05, "loss": 2.1936, "step": 2652 }, { "epoch": 8.62, "learning_rate": 9.547137408459257e-05, "loss": 2.2038, "step": 2655 }, { "epoch": 8.63, "learning_rate": 9.52702488424655e-05, "loss": 2.1978, "step": 2658 }, { "epoch": 8.64, "learning_rate": 9.506914277391439e-05, "loss": 2.1977, "step": 2661 }, { "epoch": 8.65, "learning_rate": 9.486805669418769e-05, "loss": 2.1772, "step": 2664 }, { "epoch": 8.66, "learning_rate": 9.466699141845287e-05, "loss": 2.1929, "step": 2667 }, { "epoch": 8.67, "learning_rate": 9.446594776179306e-05, "loss": 2.2712, "step": 2670 }, { "epoch": 8.68, "learning_rate": 9.426492653920375e-05, "loss": 2.2606, "step": 2673 }, { "epoch": 8.69, "learning_rate": 9.406392856558949e-05, "loss": 2.1655, "step": 2676 }, { "epoch": 8.7, "learning_rate": 9.386295465576053e-05, "loss": 2.1544, "step": 2679 }, { "epoch": 8.71, "learning_rate": 9.366200562442963e-05, "loss": 2.2016, "step": 2682 }, { "epoch": 8.72, "learning_rate": 9.346108228620868e-05, "loss": 2.3044, "step": 2685 }, { "epoch": 8.73, "learning_rate": 9.326018545560542e-05, "loss": 2.2102, "step": 2688 }, { "epoch": 8.74, "learning_rate": 9.305931594702007e-05, "loss": 2.1511, "step": 2691 }, { "epoch": 8.75, "learning_rate": 9.28584745747422e-05, "loss": 2.212, "step": 2694 }, { "epoch": 8.76, "learning_rate": 9.265766215294725e-05, "loss": 2.1871, "step": 2697 }, { "epoch": 8.77, "learning_rate": 9.245687949569332e-05, "loss": 2.226, "step": 2700 }, { "epoch": 8.78, "learning_rate": 9.225612741691788e-05, "loss": 2.1323, "step": 2703 }, { "epoch": 8.79, "learning_rate": 9.205540673043434e-05, "loss": 2.1258, "step": 2706 }, { "epoch": 8.8, "learning_rate": 9.185471824992891e-05, "loss": 2.1963, "step": 2709 }, { "epoch": 8.81, "learning_rate": 9.165406278895732e-05, "loss": 2.2423, "step": 2712 }, { "epoch": 8.81, "learning_rate": 9.145344116094134e-05, "loss": 2.1678, "step": 2715 }, { "epoch": 8.82, "learning_rate": 9.125285417916563e-05, "loss": 2.196, "step": 2718 }, { "epoch": 8.83, "learning_rate": 9.105230265677437e-05, "loss": 2.1637, "step": 2721 }, { "epoch": 8.84, "learning_rate": 9.085178740676803e-05, "loss": 2.1019, "step": 2724 }, { "epoch": 8.85, "learning_rate": 9.065130924199998e-05, "loss": 2.1651, "step": 2727 }, { "epoch": 8.86, "learning_rate": 9.045086897517337e-05, "loss": 2.2656, "step": 2730 }, { "epoch": 8.87, "learning_rate": 9.025046741883764e-05, "loss": 2.2224, "step": 2733 }, { "epoch": 8.88, "learning_rate": 9.005010538538527e-05, "loss": 2.1764, "step": 2736 }, { "epoch": 8.89, "learning_rate": 8.984978368704855e-05, "loss": 2.1928, "step": 2739 }, { "epoch": 8.9, "learning_rate": 8.964950313589633e-05, "loss": 2.111, "step": 2742 }, { "epoch": 8.91, "learning_rate": 8.944926454383049e-05, "loss": 2.2286, "step": 2745 }, { "epoch": 8.92, "learning_rate": 8.924906872258306e-05, "loss": 2.18, "step": 2748 }, { "epoch": 8.93, "learning_rate": 8.904891648371244e-05, "loss": 2.1869, "step": 2751 }, { "epoch": 8.94, "learning_rate": 8.884880863860051e-05, "loss": 2.2054, "step": 2754 }, { "epoch": 8.95, "learning_rate": 8.864874599844911e-05, "loss": 2.1351, "step": 2757 }, { "epoch": 8.96, "learning_rate": 8.84487293742769e-05, "loss": 2.2412, "step": 2760 }, { "epoch": 8.97, "learning_rate": 8.824875957691588e-05, "loss": 2.2394, "step": 2763 }, { "epoch": 8.98, "learning_rate": 8.804883741700833e-05, "loss": 2.1228, "step": 2766 }, { "epoch": 8.99, "learning_rate": 8.78489637050034e-05, "loss": 2.1862, "step": 2769 }, { "epoch": 9.0, "learning_rate": 8.764913925115381e-05, "loss": 2.1463, "step": 2772 }, { "epoch": 9.0, "eval_accuracy": 0.6357306777396295, "eval_loss": 2.106170654296875, "eval_runtime": 16.2999, "eval_samples_per_second": 134.786, "eval_steps_per_second": 67.424, "step": 2772 }, { "epoch": 9.01, "learning_rate": 8.744936486551262e-05, "loss": 2.2427, "step": 2775 }, { "epoch": 9.02, "learning_rate": 8.724964135792988e-05, "loss": 2.105, "step": 2778 }, { "epoch": 9.03, "learning_rate": 8.70499695380494e-05, "loss": 2.2257, "step": 2781 }, { "epoch": 9.04, "learning_rate": 8.685035021530554e-05, "loss": 2.2433, "step": 2784 }, { "epoch": 9.05, "learning_rate": 8.665078419891977e-05, "loss": 2.1763, "step": 2787 }, { "epoch": 9.06, "learning_rate": 8.645127229789746e-05, "loss": 2.2108, "step": 2790 }, { "epoch": 9.07, "learning_rate": 8.625181532102463e-05, "loss": 2.115, "step": 2793 }, { "epoch": 9.08, "learning_rate": 8.605241407686462e-05, "loss": 2.2288, "step": 2796 }, { "epoch": 9.09, "learning_rate": 8.585306937375486e-05, "loss": 2.1351, "step": 2799 }, { "epoch": 9.1, "learning_rate": 8.565378201980361e-05, "loss": 2.1875, "step": 2802 }, { "epoch": 9.11, "learning_rate": 8.545455282288661e-05, "loss": 2.1558, "step": 2805 }, { "epoch": 9.12, "learning_rate": 8.525538259064381e-05, "loss": 2.2156, "step": 2808 }, { "epoch": 9.13, "learning_rate": 8.505627213047617e-05, "loss": 2.1982, "step": 2811 }, { "epoch": 9.14, "learning_rate": 8.485722224954237e-05, "loss": 2.1447, "step": 2814 }, { "epoch": 9.15, "learning_rate": 8.465823375475537e-05, "loss": 2.2311, "step": 2817 }, { "epoch": 9.16, "learning_rate": 8.445930745277953e-05, "loss": 2.1685, "step": 2820 }, { "epoch": 9.17, "learning_rate": 8.426044415002684e-05, "loss": 2.1237, "step": 2823 }, { "epoch": 9.18, "learning_rate": 8.406164465265406e-05, "loss": 2.1082, "step": 2826 }, { "epoch": 9.19, "learning_rate": 8.386290976655924e-05, "loss": 2.1352, "step": 2829 }, { "epoch": 9.19, "learning_rate": 8.366424029737853e-05, "loss": 2.1588, "step": 2832 }, { "epoch": 9.2, "learning_rate": 8.346563705048277e-05, "loss": 2.2979, "step": 2835 }, { "epoch": 9.21, "learning_rate": 8.326710083097462e-05, "loss": 2.1507, "step": 2838 }, { "epoch": 9.22, "learning_rate": 8.306863244368474e-05, "loss": 2.127, "step": 2841 }, { "epoch": 9.23, "learning_rate": 8.287023269316894e-05, "loss": 2.1869, "step": 2844 }, { "epoch": 9.24, "learning_rate": 8.267190238370482e-05, "loss": 2.1259, "step": 2847 }, { "epoch": 9.25, "learning_rate": 8.247364231928837e-05, "loss": 2.2649, "step": 2850 }, { "epoch": 9.26, "learning_rate": 8.227545330363087e-05, "loss": 2.1888, "step": 2853 }, { "epoch": 9.27, "learning_rate": 8.207733614015566e-05, "loss": 2.1949, "step": 2856 }, { "epoch": 9.28, "learning_rate": 8.18792916319947e-05, "loss": 2.1379, "step": 2859 }, { "epoch": 9.29, "learning_rate": 8.168132058198546e-05, "loss": 2.1585, "step": 2862 }, { "epoch": 9.3, "learning_rate": 8.148342379266759e-05, "loss": 2.1941, "step": 2865 }, { "epoch": 9.31, "learning_rate": 8.128560206627974e-05, "loss": 2.1447, "step": 2868 }, { "epoch": 9.32, "learning_rate": 8.108785620475624e-05, "loss": 2.1413, "step": 2871 }, { "epoch": 9.33, "learning_rate": 8.089018700972393e-05, "loss": 2.25, "step": 2874 }, { "epoch": 9.34, "learning_rate": 8.069259528249882e-05, "loss": 2.2048, "step": 2877 }, { "epoch": 9.35, "learning_rate": 8.049508182408284e-05, "loss": 2.1432, "step": 2880 }, { "epoch": 9.36, "learning_rate": 8.029764743516068e-05, "loss": 2.2312, "step": 2883 }, { "epoch": 9.37, "learning_rate": 8.01002929160965e-05, "loss": 2.1929, "step": 2886 }, { "epoch": 9.38, "learning_rate": 7.990301906693069e-05, "loss": 2.1687, "step": 2889 }, { "epoch": 9.39, "learning_rate": 7.970582668737652e-05, "loss": 2.1176, "step": 2892 }, { "epoch": 9.4, "learning_rate": 7.950871657681716e-05, "loss": 2.1793, "step": 2895 }, { "epoch": 9.41, "learning_rate": 7.931168953430213e-05, "loss": 2.1538, "step": 2898 }, { "epoch": 9.42, "learning_rate": 7.91147463585443e-05, "loss": 2.1519, "step": 2901 }, { "epoch": 9.43, "learning_rate": 7.891788784791655e-05, "loss": 2.1821, "step": 2904 }, { "epoch": 9.44, "learning_rate": 7.872111480044847e-05, "loss": 2.145, "step": 2907 }, { "epoch": 9.45, "learning_rate": 7.852442801382322e-05, "loss": 2.1574, "step": 2910 }, { "epoch": 9.46, "learning_rate": 7.832782828537437e-05, "loss": 2.2808, "step": 2913 }, { "epoch": 9.47, "learning_rate": 7.813131641208245e-05, "loss": 2.1604, "step": 2916 }, { "epoch": 9.48, "learning_rate": 7.793489319057195e-05, "loss": 2.1786, "step": 2919 }, { "epoch": 9.49, "learning_rate": 7.773855941710786e-05, "loss": 2.2453, "step": 2922 }, { "epoch": 9.5, "learning_rate": 7.754231588759265e-05, "loss": 2.2529, "step": 2925 }, { "epoch": 9.51, "learning_rate": 7.734616339756291e-05, "loss": 2.1199, "step": 2928 }, { "epoch": 9.52, "learning_rate": 7.715010274218625e-05, "loss": 2.2108, "step": 2931 }, { "epoch": 9.53, "learning_rate": 7.695413471625792e-05, "loss": 2.2136, "step": 2934 }, { "epoch": 9.54, "learning_rate": 7.675826011419766e-05, "loss": 2.1859, "step": 2937 }, { "epoch": 9.55, "learning_rate": 7.656247973004656e-05, "loss": 2.1492, "step": 2940 }, { "epoch": 9.56, "learning_rate": 7.63667943574637e-05, "loss": 2.2054, "step": 2943 }, { "epoch": 9.56, "learning_rate": 7.617120478972297e-05, "loss": 2.1295, "step": 2946 }, { "epoch": 9.57, "learning_rate": 7.597571181971006e-05, "loss": 2.0959, "step": 2949 }, { "epoch": 9.58, "learning_rate": 7.578031623991886e-05, "loss": 2.2796, "step": 2952 }, { "epoch": 9.59, "learning_rate": 7.558501884244857e-05, "loss": 2.0984, "step": 2955 }, { "epoch": 9.6, "learning_rate": 7.538982041900033e-05, "loss": 2.2107, "step": 2958 }, { "epoch": 9.61, "learning_rate": 7.519472176087414e-05, "loss": 2.1768, "step": 2961 }, { "epoch": 9.62, "learning_rate": 7.49997236589654e-05, "loss": 2.1817, "step": 2964 }, { "epoch": 9.63, "learning_rate": 7.480482690376207e-05, "loss": 2.1886, "step": 2967 }, { "epoch": 9.64, "learning_rate": 7.461003228534115e-05, "loss": 2.2048, "step": 2970 }, { "epoch": 9.65, "learning_rate": 7.441534059336563e-05, "loss": 2.1702, "step": 2973 }, { "epoch": 9.66, "learning_rate": 7.422075261708125e-05, "loss": 2.1699, "step": 2976 }, { "epoch": 9.67, "learning_rate": 7.402626914531328e-05, "loss": 2.1161, "step": 2979 }, { "epoch": 9.68, "learning_rate": 7.383189096646335e-05, "loss": 2.1665, "step": 2982 }, { "epoch": 9.69, "learning_rate": 7.363761886850633e-05, "loss": 2.196, "step": 2985 }, { "epoch": 9.7, "learning_rate": 7.344345363898697e-05, "loss": 2.1785, "step": 2988 }, { "epoch": 9.71, "learning_rate": 7.324939606501685e-05, "loss": 2.1046, "step": 2991 }, { "epoch": 9.72, "learning_rate": 7.305544693327106e-05, "loss": 2.1544, "step": 2994 }, { "epoch": 9.73, "learning_rate": 7.286160702998515e-05, "loss": 2.1705, "step": 2997 }, { "epoch": 9.74, "learning_rate": 7.266787714095182e-05, "loss": 2.2029, "step": 3000 }, { "epoch": 9.75, "learning_rate": 7.247425805151788e-05, "loss": 2.1483, "step": 3003 }, { "epoch": 9.76, "learning_rate": 7.228075054658096e-05, "loss": 2.1243, "step": 3006 }, { "epoch": 9.77, "learning_rate": 7.208735541058622e-05, "loss": 2.0956, "step": 3009 }, { "epoch": 9.78, "learning_rate": 7.189407342752345e-05, "loss": 2.1788, "step": 3012 }, { "epoch": 9.79, "learning_rate": 7.170090538092367e-05, "loss": 2.2902, "step": 3015 }, { "epoch": 9.8, "learning_rate": 7.150785205385596e-05, "loss": 2.0941, "step": 3018 }, { "epoch": 9.81, "learning_rate": 7.131491422892454e-05, "loss": 2.1134, "step": 3021 }, { "epoch": 9.82, "learning_rate": 7.112209268826517e-05, "loss": 2.1195, "step": 3024 }, { "epoch": 9.83, "learning_rate": 7.092938821354238e-05, "loss": 2.2264, "step": 3027 }, { "epoch": 9.84, "learning_rate": 7.073680158594609e-05, "loss": 2.205, "step": 3030 }, { "epoch": 9.85, "learning_rate": 7.054433358618847e-05, "loss": 2.1962, "step": 3033 }, { "epoch": 9.86, "learning_rate": 7.035198499450071e-05, "loss": 2.1881, "step": 3036 }, { "epoch": 9.87, "learning_rate": 7.015975659063017e-05, "loss": 2.1807, "step": 3039 }, { "epoch": 9.88, "learning_rate": 6.996764915383681e-05, "loss": 2.1038, "step": 3042 }, { "epoch": 9.89, "learning_rate": 6.97756634628902e-05, "loss": 2.1783, "step": 3045 }, { "epoch": 9.9, "learning_rate": 6.95838002960665e-05, "loss": 2.1281, "step": 3048 }, { "epoch": 9.91, "learning_rate": 6.939206043114506e-05, "loss": 2.1633, "step": 3051 }, { "epoch": 9.92, "learning_rate": 6.920044464540543e-05, "loss": 2.1237, "step": 3054 }, { "epoch": 9.93, "learning_rate": 6.900895371562419e-05, "loss": 2.1732, "step": 3057 }, { "epoch": 9.94, "learning_rate": 6.881758841807176e-05, "loss": 2.139, "step": 3060 }, { "epoch": 9.94, "learning_rate": 6.862634952850926e-05, "loss": 2.1296, "step": 3063 }, { "epoch": 9.95, "learning_rate": 6.843523782218534e-05, "loss": 2.1914, "step": 3066 }, { "epoch": 9.96, "learning_rate": 6.824425407383315e-05, "loss": 2.1584, "step": 3069 }, { "epoch": 9.97, "learning_rate": 6.805339905766706e-05, "loss": 2.128, "step": 3072 }, { "epoch": 9.98, "learning_rate": 6.786267354737955e-05, "loss": 2.082, "step": 3075 }, { "epoch": 9.99, "learning_rate": 6.767207831613828e-05, "loss": 2.0882, "step": 3078 }, { "epoch": 10.0, "eval_accuracy": 0.636978362829467, "eval_loss": 2.084742307662964, "eval_runtime": 16.3444, "eval_samples_per_second": 134.419, "eval_steps_per_second": 67.24, "step": 3080 }, { "epoch": 10.0, "learning_rate": 6.748161413658256e-05, "loss": 2.0895, "step": 3081 }, { "epoch": 10.01, "learning_rate": 6.729128178082058e-05, "loss": 2.143, "step": 3084 }, { "epoch": 10.02, "learning_rate": 6.71010820204261e-05, "loss": 2.1523, "step": 3087 }, { "epoch": 10.03, "learning_rate": 6.691101562643534e-05, "loss": 2.1812, "step": 3090 }, { "epoch": 10.04, "learning_rate": 6.672108336934386e-05, "loss": 2.1807, "step": 3093 }, { "epoch": 10.05, "learning_rate": 6.653128601910357e-05, "loss": 2.1916, "step": 3096 }, { "epoch": 10.06, "learning_rate": 6.63416243451194e-05, "loss": 2.2036, "step": 3099 }, { "epoch": 10.07, "learning_rate": 6.615209911624623e-05, "loss": 2.1112, "step": 3102 }, { "epoch": 10.08, "learning_rate": 6.596271110078591e-05, "loss": 2.0984, "step": 3105 }, { "epoch": 10.09, "learning_rate": 6.577346106648399e-05, "loss": 2.2862, "step": 3108 }, { "epoch": 10.1, "learning_rate": 6.558434978052667e-05, "loss": 2.1379, "step": 3111 }, { "epoch": 10.11, "learning_rate": 6.539537800953777e-05, "loss": 2.1475, "step": 3114 }, { "epoch": 10.12, "learning_rate": 6.520654651957543e-05, "loss": 2.144, "step": 3117 }, { "epoch": 10.13, "learning_rate": 6.50178560761292e-05, "loss": 2.1383, "step": 3120 }, { "epoch": 10.14, "learning_rate": 6.482930744411677e-05, "loss": 2.0835, "step": 3123 }, { "epoch": 10.15, "learning_rate": 6.464090138788102e-05, "loss": 2.0889, "step": 3126 }, { "epoch": 10.16, "learning_rate": 6.445263867118679e-05, "loss": 2.2135, "step": 3129 }, { "epoch": 10.17, "learning_rate": 6.426452005721797e-05, "loss": 2.1301, "step": 3132 }, { "epoch": 10.18, "learning_rate": 6.407654630857416e-05, "loss": 2.0498, "step": 3135 }, { "epoch": 10.19, "learning_rate": 6.388871818726774e-05, "loss": 2.0766, "step": 3138 }, { "epoch": 10.2, "learning_rate": 6.370103645472072e-05, "loss": 2.195, "step": 3141 }, { "epoch": 10.21, "learning_rate": 6.351350187176176e-05, "loss": 2.2205, "step": 3144 }, { "epoch": 10.22, "learning_rate": 6.332611519862284e-05, "loss": 2.1172, "step": 3147 }, { "epoch": 10.23, "learning_rate": 6.313887719493657e-05, "loss": 2.1688, "step": 3150 }, { "epoch": 10.24, "learning_rate": 6.295178861973267e-05, "loss": 2.0986, "step": 3153 }, { "epoch": 10.25, "learning_rate": 6.27648502314352e-05, "loss": 2.0658, "step": 3156 }, { "epoch": 10.26, "learning_rate": 6.257806278785937e-05, "loss": 2.1681, "step": 3159 }, { "epoch": 10.27, "learning_rate": 6.239142704620853e-05, "loss": 2.2028, "step": 3162 }, { "epoch": 10.28, "learning_rate": 6.220494376307094e-05, "loss": 2.1707, "step": 3165 }, { "epoch": 10.29, "learning_rate": 6.201861369441697e-05, "loss": 2.2144, "step": 3168 }, { "epoch": 10.3, "learning_rate": 6.183243759559579e-05, "loss": 2.2155, "step": 3171 }, { "epoch": 10.31, "learning_rate": 6.164641622133241e-05, "loss": 2.1628, "step": 3174 }, { "epoch": 10.31, "learning_rate": 6.146055032572466e-05, "loss": 2.1457, "step": 3177 }, { "epoch": 10.32, "learning_rate": 6.127484066224005e-05, "loss": 2.079, "step": 3180 }, { "epoch": 10.33, "learning_rate": 6.108928798371272e-05, "loss": 2.1211, "step": 3183 }, { "epoch": 10.34, "learning_rate": 6.090389304234052e-05, "loss": 2.1723, "step": 3186 }, { "epoch": 10.35, "learning_rate": 6.0718656589681764e-05, "loss": 2.1723, "step": 3189 }, { "epoch": 10.36, "learning_rate": 6.053357937665237e-05, "loss": 2.1179, "step": 3192 }, { "epoch": 10.37, "learning_rate": 6.034866215352262e-05, "loss": 2.2066, "step": 3195 }, { "epoch": 10.38, "learning_rate": 6.016390566991429e-05, "loss": 2.1562, "step": 3198 }, { "epoch": 10.39, "learning_rate": 5.997931067479753e-05, "loss": 2.1374, "step": 3201 }, { "epoch": 10.4, "learning_rate": 5.979487791648789e-05, "loss": 2.1595, "step": 3204 }, { "epoch": 10.41, "learning_rate": 5.961060814264321e-05, "loss": 2.194, "step": 3207 }, { "epoch": 10.42, "learning_rate": 5.942650210026055e-05, "loss": 2.1749, "step": 3210 }, { "epoch": 10.43, "learning_rate": 5.9242560535673344e-05, "loss": 2.207, "step": 3213 }, { "epoch": 10.44, "learning_rate": 5.905878419454821e-05, "loss": 2.1641, "step": 3216 }, { "epoch": 10.45, "learning_rate": 5.8875173821881904e-05, "loss": 2.2015, "step": 3219 }, { "epoch": 10.46, "learning_rate": 5.869173016199858e-05, "loss": 2.0588, "step": 3222 }, { "epoch": 10.47, "learning_rate": 5.850845395854636e-05, "loss": 2.1809, "step": 3225 }, { "epoch": 10.48, "learning_rate": 5.8325345954494633e-05, "loss": 2.0862, "step": 3228 }, { "epoch": 10.49, "learning_rate": 5.814240689213086e-05, "loss": 2.1122, "step": 3231 }, { "epoch": 10.5, "learning_rate": 5.795963751305777e-05, "loss": 2.1289, "step": 3234 }, { "epoch": 10.51, "learning_rate": 5.77770385581901e-05, "loss": 2.1519, "step": 3237 }, { "epoch": 10.52, "learning_rate": 5.759461076775177e-05, "loss": 2.1731, "step": 3240 }, { "epoch": 10.53, "learning_rate": 5.7412354881272865e-05, "loss": 2.1847, "step": 3243 }, { "epoch": 10.54, "learning_rate": 5.7230271637586555e-05, "loss": 2.2063, "step": 3246 }, { "epoch": 10.55, "learning_rate": 5.7048361774826086e-05, "loss": 2.1409, "step": 3249 }, { "epoch": 10.56, "learning_rate": 5.686662603042201e-05, "loss": 2.0635, "step": 3252 }, { "epoch": 10.57, "learning_rate": 5.668506514109887e-05, "loss": 2.0779, "step": 3255 }, { "epoch": 10.58, "learning_rate": 5.6503679842872506e-05, "loss": 2.0536, "step": 3258 }, { "epoch": 10.59, "learning_rate": 5.6322470871046825e-05, "loss": 2.1569, "step": 3261 }, { "epoch": 10.6, "learning_rate": 5.6141438960211065e-05, "loss": 2.1513, "step": 3264 }, { "epoch": 10.61, "learning_rate": 5.596058484423656e-05, "loss": 2.1937, "step": 3267 }, { "epoch": 10.62, "learning_rate": 5.5779909256274035e-05, "loss": 2.1962, "step": 3270 }, { "epoch": 10.63, "learning_rate": 5.559941292875035e-05, "loss": 2.1182, "step": 3273 }, { "epoch": 10.64, "learning_rate": 5.5419096593365724e-05, "loss": 2.1865, "step": 3276 }, { "epoch": 10.65, "learning_rate": 5.523896098109079e-05, "loss": 2.158, "step": 3279 }, { "epoch": 10.66, "learning_rate": 5.505900682216354e-05, "loss": 2.0896, "step": 3282 }, { "epoch": 10.67, "learning_rate": 5.487923484608629e-05, "loss": 2.1242, "step": 3285 }, { "epoch": 10.68, "learning_rate": 5.469964578162288e-05, "loss": 2.1423, "step": 3288 }, { "epoch": 10.69, "learning_rate": 5.4520240356795725e-05, "loss": 2.114, "step": 3291 }, { "epoch": 10.69, "learning_rate": 5.4341019298882656e-05, "loss": 2.1531, "step": 3294 }, { "epoch": 10.7, "learning_rate": 5.416198333441423e-05, "loss": 2.1431, "step": 3297 }, { "epoch": 10.71, "learning_rate": 5.3983133189170686e-05, "loss": 2.0837, "step": 3300 }, { "epoch": 10.72, "learning_rate": 5.380446958817888e-05, "loss": 2.1647, "step": 3303 }, { "epoch": 10.73, "learning_rate": 5.362599325570945e-05, "loss": 2.1104, "step": 3306 }, { "epoch": 10.74, "learning_rate": 5.344770491527402e-05, "loss": 2.11, "step": 3309 }, { "epoch": 10.75, "learning_rate": 5.3269605289621947e-05, "loss": 2.1962, "step": 3312 }, { "epoch": 10.76, "learning_rate": 5.309169510073777e-05, "loss": 2.213, "step": 3315 }, { "epoch": 10.77, "learning_rate": 5.291397506983786e-05, "loss": 2.0556, "step": 3318 }, { "epoch": 10.78, "learning_rate": 5.273644591736793e-05, "loss": 2.2094, "step": 3321 }, { "epoch": 10.79, "learning_rate": 5.2559108362999796e-05, "loss": 2.0953, "step": 3324 }, { "epoch": 10.8, "learning_rate": 5.238196312562851e-05, "loss": 2.2436, "step": 3327 }, { "epoch": 10.81, "learning_rate": 5.220501092336966e-05, "loss": 2.1752, "step": 3330 }, { "epoch": 10.82, "learning_rate": 5.2028252473556226e-05, "loss": 2.1757, "step": 3333 }, { "epoch": 10.83, "learning_rate": 5.1851688492735705e-05, "loss": 2.193, "step": 3336 }, { "epoch": 10.84, "learning_rate": 5.167531969666735e-05, "loss": 2.1306, "step": 3339 }, { "epoch": 10.85, "learning_rate": 5.149914680031909e-05, "loss": 2.1043, "step": 3342 }, { "epoch": 10.86, "learning_rate": 5.132317051786468e-05, "loss": 2.1032, "step": 3345 }, { "epoch": 10.87, "learning_rate": 5.114739156268094e-05, "loss": 2.1205, "step": 3348 }, { "epoch": 10.88, "learning_rate": 5.097181064734475e-05, "loss": 2.1494, "step": 3351 }, { "epoch": 10.89, "learning_rate": 5.0796428483630074e-05, "loss": 2.1382, "step": 3354 }, { "epoch": 10.9, "learning_rate": 5.062124578250529e-05, "loss": 2.0946, "step": 3357 }, { "epoch": 10.91, "learning_rate": 5.04462632541301e-05, "loss": 2.0986, "step": 3360 }, { "epoch": 10.92, "learning_rate": 5.027148160785273e-05, "loss": 2.1342, "step": 3363 }, { "epoch": 10.93, "learning_rate": 5.009690155220715e-05, "loss": 2.1049, "step": 3366 }, { "epoch": 10.94, "learning_rate": 4.992252379491012e-05, "loss": 2.2269, "step": 3369 }, { "epoch": 10.95, "learning_rate": 4.974834904285822e-05, "loss": 2.1275, "step": 3372 }, { "epoch": 10.96, "learning_rate": 4.957437800212512e-05, "loss": 2.0984, "step": 3375 }, { "epoch": 10.97, "learning_rate": 4.940061137795876e-05, "loss": 2.1488, "step": 3378 }, { "epoch": 10.98, "learning_rate": 4.9227049874778306e-05, "loss": 2.1061, "step": 3381 }, { "epoch": 10.99, "learning_rate": 4.905369419617137e-05, "loss": 2.2105, "step": 3384 }, { "epoch": 11.0, "learning_rate": 4.888054504489142e-05, "loss": 2.1669, "step": 3387 }, { "epoch": 11.0, "eval_accuracy": 0.6398645281002108, "eval_loss": 2.068744421005249, "eval_runtime": 16.3144, "eval_samples_per_second": 134.666, "eval_steps_per_second": 67.364, "step": 3388 }, { "epoch": 11.01, "learning_rate": 4.870760312285445e-05, "loss": 2.1265, "step": 3390 }, { "epoch": 11.02, "learning_rate": 4.853486913113644e-05, "loss": 2.2385, "step": 3393 }, { "epoch": 11.03, "learning_rate": 4.836234376997056e-05, "loss": 2.0393, "step": 3396 }, { "epoch": 11.04, "learning_rate": 4.8190027738744134e-05, "loss": 2.0961, "step": 3399 }, { "epoch": 11.05, "learning_rate": 4.801792173599586e-05, "loss": 2.0916, "step": 3402 }, { "epoch": 11.06, "learning_rate": 4.784602645941314e-05, "loss": 2.1191, "step": 3405 }, { "epoch": 11.06, "learning_rate": 4.7674342605829094e-05, "loss": 2.0921, "step": 3408 }, { "epoch": 11.07, "learning_rate": 4.7502870871219675e-05, "loss": 2.0888, "step": 3411 }, { "epoch": 11.08, "learning_rate": 4.7331611950701096e-05, "loss": 2.1485, "step": 3414 }, { "epoch": 11.09, "learning_rate": 4.716056653852672e-05, "loss": 2.1266, "step": 3417 }, { "epoch": 11.1, "learning_rate": 4.698973532808443e-05, "loss": 2.1702, "step": 3420 }, { "epoch": 11.11, "learning_rate": 4.6819119011893805e-05, "loss": 2.1296, "step": 3423 }, { "epoch": 11.12, "learning_rate": 4.664871828160331e-05, "loss": 2.0754, "step": 3426 }, { "epoch": 11.13, "learning_rate": 4.647853382798736e-05, "loss": 2.1102, "step": 3429 }, { "epoch": 11.14, "learning_rate": 4.630856634094366e-05, "loss": 2.1222, "step": 3432 }, { "epoch": 11.15, "learning_rate": 4.613881650949044e-05, "loss": 2.1703, "step": 3435 }, { "epoch": 11.16, "learning_rate": 4.596928502176349e-05, "loss": 2.1288, "step": 3438 }, { "epoch": 11.17, "learning_rate": 4.579997256501355e-05, "loss": 2.0522, "step": 3441 }, { "epoch": 11.18, "learning_rate": 4.563087982560345e-05, "loss": 2.1225, "step": 3444 }, { "epoch": 11.19, "learning_rate": 4.546200748900525e-05, "loss": 2.068, "step": 3447 }, { "epoch": 11.2, "learning_rate": 4.529335623979757e-05, "loss": 2.0433, "step": 3450 }, { "epoch": 11.21, "learning_rate": 4.512492676166283e-05, "loss": 2.0679, "step": 3453 }, { "epoch": 11.22, "learning_rate": 4.49567197373844e-05, "loss": 2.1568, "step": 3456 }, { "epoch": 11.23, "learning_rate": 4.478873584884378e-05, "loss": 2.0446, "step": 3459 }, { "epoch": 11.24, "learning_rate": 4.4620975777018034e-05, "loss": 2.1532, "step": 3462 }, { "epoch": 11.25, "learning_rate": 4.44534402019769e-05, "loss": 2.1659, "step": 3465 }, { "epoch": 11.26, "learning_rate": 4.428612980287996e-05, "loss": 2.1492, "step": 3468 }, { "epoch": 11.27, "learning_rate": 4.411904525797408e-05, "loss": 2.1019, "step": 3471 }, { "epoch": 11.28, "learning_rate": 4.395218724459047e-05, "loss": 2.0283, "step": 3474 }, { "epoch": 11.29, "learning_rate": 4.3785556439142005e-05, "loss": 2.1538, "step": 3477 }, { "epoch": 11.3, "learning_rate": 4.361915351712059e-05, "loss": 2.1303, "step": 3480 }, { "epoch": 11.31, "learning_rate": 4.345297915309432e-05, "loss": 2.0312, "step": 3483 }, { "epoch": 11.32, "learning_rate": 4.3287034020704684e-05, "loss": 2.1296, "step": 3486 }, { "epoch": 11.33, "learning_rate": 4.3121318792663914e-05, "loss": 2.1551, "step": 3489 }, { "epoch": 11.34, "learning_rate": 4.295583414075234e-05, "loss": 2.1197, "step": 3492 }, { "epoch": 11.35, "learning_rate": 4.279058073581544e-05, "loss": 2.2175, "step": 3495 }, { "epoch": 11.36, "learning_rate": 4.2625559247761394e-05, "loss": 2.1445, "step": 3498 }, { "epoch": 11.37, "learning_rate": 4.246077034555819e-05, "loss": 2.1581, "step": 3501 }, { "epoch": 11.38, "learning_rate": 4.229621469723091e-05, "loss": 2.0796, "step": 3504 }, { "epoch": 11.39, "learning_rate": 4.2131892969859054e-05, "loss": 2.1374, "step": 3507 }, { "epoch": 11.4, "learning_rate": 4.196780582957396e-05, "loss": 2.1188, "step": 3510 }, { "epoch": 11.41, "learning_rate": 4.180395394155584e-05, "loss": 2.0809, "step": 3513 }, { "epoch": 11.42, "learning_rate": 4.1640337970031384e-05, "loss": 2.1211, "step": 3516 }, { "epoch": 11.43, "learning_rate": 4.1476958578270783e-05, "loss": 2.1566, "step": 3519 }, { "epoch": 11.44, "learning_rate": 4.1313816428585316e-05, "loss": 2.1824, "step": 3522 }, { "epoch": 11.44, "learning_rate": 4.1150912182324396e-05, "loss": 2.1873, "step": 3525 }, { "epoch": 11.45, "learning_rate": 4.098824649987304e-05, "loss": 2.1329, "step": 3528 }, { "epoch": 11.46, "learning_rate": 4.0825820040649246e-05, "loss": 2.1283, "step": 3531 }, { "epoch": 11.47, "learning_rate": 4.06636334631012e-05, "loss": 2.1378, "step": 3534 }, { "epoch": 11.48, "learning_rate": 4.0501687424704613e-05, "loss": 2.1189, "step": 3537 }, { "epoch": 11.49, "learning_rate": 4.033998258196019e-05, "loss": 2.1541, "step": 3540 }, { "epoch": 11.5, "learning_rate": 4.017851959039075e-05, "loss": 2.1264, "step": 3543 }, { "epoch": 11.51, "learning_rate": 4.001729910453872e-05, "loss": 2.0864, "step": 3546 }, { "epoch": 11.52, "learning_rate": 3.985632177796353e-05, "loss": 2.0457, "step": 3549 }, { "epoch": 11.53, "learning_rate": 3.9695588263238847e-05, "loss": 2.1076, "step": 3552 }, { "epoch": 11.54, "learning_rate": 3.953509921194991e-05, "loss": 2.1368, "step": 3555 }, { "epoch": 11.55, "learning_rate": 3.9374855274691035e-05, "loss": 2.1462, "step": 3558 }, { "epoch": 11.56, "learning_rate": 3.921485710106283e-05, "loss": 2.1003, "step": 3561 }, { "epoch": 11.57, "learning_rate": 3.9055105339669595e-05, "loss": 2.1287, "step": 3564 }, { "epoch": 11.58, "learning_rate": 3.889560063811679e-05, "loss": 2.144, "step": 3567 }, { "epoch": 11.59, "learning_rate": 3.873634364300835e-05, "loss": 2.1544, "step": 3570 }, { "epoch": 11.6, "learning_rate": 3.857733499994397e-05, "loss": 2.0732, "step": 3573 }, { "epoch": 11.61, "learning_rate": 3.841857535351657e-05, "loss": 2.1219, "step": 3576 }, { "epoch": 11.62, "learning_rate": 3.82600653473098e-05, "loss": 2.1629, "step": 3579 }, { "epoch": 11.63, "learning_rate": 3.810180562389519e-05, "loss": 2.1042, "step": 3582 }, { "epoch": 11.64, "learning_rate": 3.794379682482965e-05, "loss": 2.1244, "step": 3585 }, { "epoch": 11.65, "learning_rate": 3.7786039590653076e-05, "loss": 2.18, "step": 3588 }, { "epoch": 11.66, "learning_rate": 3.762853456088538e-05, "loss": 2.1475, "step": 3591 }, { "epoch": 11.67, "learning_rate": 3.747128237402409e-05, "loss": 2.0909, "step": 3594 }, { "epoch": 11.68, "learning_rate": 3.7314283667541885e-05, "loss": 2.123, "step": 3597 }, { "epoch": 11.69, "learning_rate": 3.715753907788374e-05, "loss": 2.121, "step": 3600 }, { "epoch": 11.7, "learning_rate": 3.700104924046452e-05, "loss": 2.0345, "step": 3603 }, { "epoch": 11.71, "learning_rate": 3.6844814789666436e-05, "loss": 2.0413, "step": 3606 }, { "epoch": 11.72, "learning_rate": 3.6688836358836386e-05, "loss": 2.1818, "step": 3609 }, { "epoch": 11.73, "learning_rate": 3.6533114580283315e-05, "loss": 2.1043, "step": 3612 }, { "epoch": 11.74, "learning_rate": 3.6377650085275874e-05, "loss": 2.0476, "step": 3615 }, { "epoch": 11.75, "learning_rate": 3.622244350403965e-05, "loss": 2.124, "step": 3618 }, { "epoch": 11.76, "learning_rate": 3.6067495465754666e-05, "loss": 2.0856, "step": 3621 }, { "epoch": 11.77, "learning_rate": 3.591280659855296e-05, "loss": 2.1257, "step": 3624 }, { "epoch": 11.78, "learning_rate": 3.575837752951591e-05, "loss": 2.1757, "step": 3627 }, { "epoch": 11.79, "learning_rate": 3.5604208884671645e-05, "loss": 2.0124, "step": 3630 }, { "epoch": 11.8, "learning_rate": 3.5450301288992596e-05, "loss": 2.0324, "step": 3633 }, { "epoch": 11.81, "learning_rate": 3.529665536639305e-05, "loss": 2.1634, "step": 3636 }, { "epoch": 11.81, "learning_rate": 3.514327173972638e-05, "loss": 2.1465, "step": 3639 }, { "epoch": 11.82, "learning_rate": 3.4990151030782744e-05, "loss": 2.0668, "step": 3642 }, { "epoch": 11.83, "learning_rate": 3.483729386028651e-05, "loss": 2.1991, "step": 3645 }, { "epoch": 11.84, "learning_rate": 3.468470084789359e-05, "loss": 2.0814, "step": 3648 }, { "epoch": 11.85, "learning_rate": 3.4532372612189104e-05, "loss": 2.1976, "step": 3651 }, { "epoch": 11.86, "learning_rate": 3.438030977068487e-05, "loss": 2.1935, "step": 3654 }, { "epoch": 11.87, "learning_rate": 3.422851293981676e-05, "loss": 2.1086, "step": 3657 }, { "epoch": 11.88, "learning_rate": 3.4076982734942296e-05, "loss": 2.1479, "step": 3660 }, { "epoch": 11.89, "learning_rate": 3.392571977033819e-05, "loss": 2.1281, "step": 3663 }, { "epoch": 11.9, "learning_rate": 3.377472465919784e-05, "loss": 2.1517, "step": 3666 }, { "epoch": 11.91, "learning_rate": 3.3623998013628675e-05, "loss": 2.1178, "step": 3669 }, { "epoch": 11.92, "learning_rate": 3.347354044464997e-05, "loss": 2.0988, "step": 3672 }, { "epoch": 11.93, "learning_rate": 3.332335256219012e-05, "loss": 2.2034, "step": 3675 }, { "epoch": 11.94, "learning_rate": 3.317343497508424e-05, "loss": 2.1123, "step": 3678 }, { "epoch": 11.95, "learning_rate": 3.302378829107178e-05, "loss": 2.1258, "step": 3681 }, { "epoch": 11.96, "learning_rate": 3.2874413116794e-05, "loss": 2.1263, "step": 3684 }, { "epoch": 11.97, "learning_rate": 3.2725310057791456e-05, "loss": 2.0565, "step": 3687 }, { "epoch": 11.98, "learning_rate": 3.2576479718501584e-05, "loss": 2.0825, "step": 3690 }, { "epoch": 11.99, "learning_rate": 3.242792270225635e-05, "loss": 2.1262, "step": 3693 }, { "epoch": 12.0, "learning_rate": 3.227963961127961e-05, "loss": 2.0983, "step": 3696 }, { "epoch": 12.0, "eval_accuracy": 0.6422703241176929, "eval_loss": 2.062872886657715, "eval_runtime": 16.2609, "eval_samples_per_second": 135.109, "eval_steps_per_second": 67.585, "step": 3696 }, { "epoch": 12.01, "learning_rate": 3.213163104668485e-05, "loss": 2.0955, "step": 3699 }, { "epoch": 12.02, "learning_rate": 3.19838976084727e-05, "loss": 2.1043, "step": 3702 }, { "epoch": 12.03, "learning_rate": 3.18364398955284e-05, "loss": 2.1251, "step": 3705 }, { "epoch": 12.04, "learning_rate": 3.168925850561943e-05, "loss": 2.1206, "step": 3708 }, { "epoch": 12.05, "learning_rate": 3.154235403539323e-05, "loss": 2.0734, "step": 3711 }, { "epoch": 12.06, "learning_rate": 3.1395727080374505e-05, "loss": 2.178, "step": 3714 }, { "epoch": 12.07, "learning_rate": 3.12493782349631e-05, "loss": 2.0629, "step": 3717 }, { "epoch": 12.08, "learning_rate": 3.110330809243134e-05, "loss": 2.1535, "step": 3720 }, { "epoch": 12.09, "learning_rate": 3.095751724492185e-05, "loss": 2.1331, "step": 3723 }, { "epoch": 12.1, "learning_rate": 3.081200628344494e-05, "loss": 2.0902, "step": 3726 }, { "epoch": 12.11, "learning_rate": 3.066677579787631e-05, "loss": 2.0434, "step": 3729 }, { "epoch": 12.12, "learning_rate": 3.0521826376954755e-05, "loss": 2.1005, "step": 3732 }, { "epoch": 12.13, "learning_rate": 3.0377158608279655e-05, "loss": 2.1764, "step": 3735 }, { "epoch": 12.14, "learning_rate": 3.0232773078308517e-05, "loss": 2.1099, "step": 3738 }, { "epoch": 12.15, "learning_rate": 3.0088670372354877e-05, "loss": 2.1211, "step": 3741 }, { "epoch": 12.16, "learning_rate": 2.99448510745856e-05, "loss": 2.1546, "step": 3744 }, { "epoch": 12.17, "learning_rate": 2.9801315768018688e-05, "loss": 2.1664, "step": 3747 }, { "epoch": 12.18, "learning_rate": 2.9658065034520978e-05, "loss": 2.0983, "step": 3750 }, { "epoch": 12.19, "learning_rate": 2.9515099454805663e-05, "loss": 2.0519, "step": 3753 }, { "epoch": 12.19, "learning_rate": 2.93724196084299e-05, "loss": 2.1333, "step": 3756 }, { "epoch": 12.2, "learning_rate": 2.923002607379265e-05, "loss": 2.0304, "step": 3759 }, { "epoch": 12.21, "learning_rate": 2.9087919428132114e-05, "loss": 2.1549, "step": 3762 }, { "epoch": 12.22, "learning_rate": 2.8946100247523533e-05, "loss": 2.1191, "step": 3765 }, { "epoch": 12.23, "learning_rate": 2.8804569106876832e-05, "loss": 2.1154, "step": 3768 }, { "epoch": 12.24, "learning_rate": 2.8663326579934292e-05, "loss": 2.175, "step": 3771 }, { "epoch": 12.25, "learning_rate": 2.8522373239268152e-05, "loss": 2.1378, "step": 3774 }, { "epoch": 12.26, "learning_rate": 2.8381709656278333e-05, "loss": 2.0927, "step": 3777 }, { "epoch": 12.27, "learning_rate": 2.8241336401190222e-05, "loss": 2.1146, "step": 3780 }, { "epoch": 12.28, "learning_rate": 2.810125404305216e-05, "loss": 2.0147, "step": 3783 }, { "epoch": 12.29, "learning_rate": 2.796146314973325e-05, "loss": 2.1068, "step": 3786 }, { "epoch": 12.3, "learning_rate": 2.7821964287921197e-05, "loss": 2.1693, "step": 3789 }, { "epoch": 12.31, "learning_rate": 2.7682758023119694e-05, "loss": 2.1336, "step": 3792 }, { "epoch": 12.32, "learning_rate": 2.7543844919646323e-05, "loss": 2.0793, "step": 3795 }, { "epoch": 12.33, "learning_rate": 2.740522554063033e-05, "loss": 2.0712, "step": 3798 }, { "epoch": 12.34, "learning_rate": 2.726690044801018e-05, "loss": 2.0706, "step": 3801 }, { "epoch": 12.35, "learning_rate": 2.7128870202531343e-05, "loss": 2.0728, "step": 3804 }, { "epoch": 12.36, "learning_rate": 2.6991135363744068e-05, "loss": 2.1108, "step": 3807 }, { "epoch": 12.37, "learning_rate": 2.6853696490001112e-05, "loss": 2.104, "step": 3810 }, { "epoch": 12.38, "learning_rate": 2.6716554138455353e-05, "loss": 2.0752, "step": 3813 }, { "epoch": 12.39, "learning_rate": 2.6579708865057694e-05, "loss": 2.154, "step": 3816 }, { "epoch": 12.4, "learning_rate": 2.6443161224554704e-05, "loss": 2.0717, "step": 3819 }, { "epoch": 12.41, "learning_rate": 2.6306911770486353e-05, "loss": 2.1225, "step": 3822 }, { "epoch": 12.42, "learning_rate": 2.6170961055183906e-05, "loss": 2.1377, "step": 3825 }, { "epoch": 12.43, "learning_rate": 2.6035309629767603e-05, "loss": 2.1614, "step": 3828 }, { "epoch": 12.44, "learning_rate": 2.5899958044144302e-05, "loss": 2.1486, "step": 3831 }, { "epoch": 12.45, "learning_rate": 2.576490684700542e-05, "loss": 2.1206, "step": 3834 }, { "epoch": 12.46, "learning_rate": 2.5630156585824727e-05, "loss": 2.0882, "step": 3837 }, { "epoch": 12.47, "learning_rate": 2.5495707806855938e-05, "loss": 2.1787, "step": 3840 }, { "epoch": 12.48, "learning_rate": 2.536156105513062e-05, "loss": 2.0932, "step": 3843 }, { "epoch": 12.49, "learning_rate": 2.522771687445612e-05, "loss": 2.1471, "step": 3846 }, { "epoch": 12.5, "learning_rate": 2.5094175807413055e-05, "loss": 2.1226, "step": 3849 }, { "epoch": 12.51, "learning_rate": 2.4960938395353296e-05, "loss": 2.1666, "step": 3852 }, { "epoch": 12.52, "learning_rate": 2.4828005178397838e-05, "loss": 2.0437, "step": 3855 }, { "epoch": 12.53, "learning_rate": 2.4695376695434448e-05, "loss": 2.0396, "step": 3858 }, { "epoch": 12.54, "learning_rate": 2.456305348411554e-05, "loss": 2.09, "step": 3861 }, { "epoch": 12.55, "learning_rate": 2.4431036080856073e-05, "loss": 2.0419, "step": 3864 }, { "epoch": 12.56, "learning_rate": 2.429932502083132e-05, "loss": 2.0626, "step": 3867 }, { "epoch": 12.56, "learning_rate": 2.41679208379746e-05, "loss": 2.1798, "step": 3870 }, { "epoch": 12.57, "learning_rate": 2.4036824064975317e-05, "loss": 2.1082, "step": 3873 }, { "epoch": 12.58, "learning_rate": 2.3906035233276614e-05, "loss": 2.0504, "step": 3876 }, { "epoch": 12.59, "learning_rate": 2.3775554873073292e-05, "loss": 2.0439, "step": 3879 }, { "epoch": 12.6, "learning_rate": 2.3645383513309704e-05, "loss": 2.1104, "step": 3882 }, { "epoch": 12.61, "learning_rate": 2.351552168167761e-05, "loss": 2.088, "step": 3885 }, { "epoch": 12.62, "learning_rate": 2.338596990461388e-05, "loss": 2.0038, "step": 3888 }, { "epoch": 12.63, "learning_rate": 2.3256728707298546e-05, "loss": 2.043, "step": 3891 }, { "epoch": 12.64, "learning_rate": 2.312779861365263e-05, "loss": 2.0785, "step": 3894 }, { "epoch": 12.65, "learning_rate": 2.299918014633592e-05, "loss": 2.1406, "step": 3897 }, { "epoch": 12.66, "learning_rate": 2.2870873826744988e-05, "loss": 2.1155, "step": 3900 }, { "epoch": 12.67, "learning_rate": 2.2742880175011028e-05, "loss": 2.1258, "step": 3903 }, { "epoch": 12.68, "learning_rate": 2.261519970999768e-05, "loss": 2.1664, "step": 3906 }, { "epoch": 12.69, "learning_rate": 2.248783294929897e-05, "loss": 2.0733, "step": 3909 }, { "epoch": 12.7, "learning_rate": 2.2360780409237294e-05, "loss": 2.135, "step": 3912 }, { "epoch": 12.71, "learning_rate": 2.2234042604861182e-05, "loss": 2.1826, "step": 3915 }, { "epoch": 12.72, "learning_rate": 2.2107620049943346e-05, "loss": 2.0611, "step": 3918 }, { "epoch": 12.73, "learning_rate": 2.1981513256978458e-05, "loss": 2.0883, "step": 3921 }, { "epoch": 12.74, "learning_rate": 2.185572273718124e-05, "loss": 2.0715, "step": 3924 }, { "epoch": 12.75, "learning_rate": 2.1730249000484203e-05, "loss": 2.0608, "step": 3927 }, { "epoch": 12.76, "learning_rate": 2.1605092555535712e-05, "loss": 2.0845, "step": 3930 }, { "epoch": 12.77, "learning_rate": 2.14802539096979e-05, "loss": 2.0893, "step": 3933 }, { "epoch": 12.78, "learning_rate": 2.1355733569044635e-05, "loss": 2.0895, "step": 3936 }, { "epoch": 12.79, "learning_rate": 2.1231532038359326e-05, "loss": 2.1653, "step": 3939 }, { "epoch": 12.8, "learning_rate": 2.11076498211331e-05, "loss": 2.0523, "step": 3942 }, { "epoch": 12.81, "learning_rate": 2.098408741956256e-05, "loss": 2.056, "step": 3945 }, { "epoch": 12.82, "learning_rate": 2.086084533454784e-05, "loss": 2.1268, "step": 3948 }, { "epoch": 12.83, "learning_rate": 2.0737924065690606e-05, "loss": 2.0818, "step": 3951 }, { "epoch": 12.84, "learning_rate": 2.0615324111292013e-05, "loss": 2.038, "step": 3954 }, { "epoch": 12.85, "learning_rate": 2.0493045968350567e-05, "loss": 2.124, "step": 3957 }, { "epoch": 12.86, "learning_rate": 2.0371090132560322e-05, "loss": 2.0462, "step": 3960 }, { "epoch": 12.87, "learning_rate": 2.0249457098308665e-05, "loss": 2.1401, "step": 3963 }, { "epoch": 12.88, "learning_rate": 2.012814735867442e-05, "loss": 2.1156, "step": 3966 }, { "epoch": 12.89, "learning_rate": 2.0007161405425866e-05, "loss": 2.1056, "step": 3969 }, { "epoch": 12.9, "learning_rate": 1.9886499729018737e-05, "loss": 2.093, "step": 3972 }, { "epoch": 12.91, "learning_rate": 1.9766162818594114e-05, "loss": 2.1043, "step": 3975 }, { "epoch": 12.92, "learning_rate": 1.9646151161976556e-05, "loss": 2.0714, "step": 3978 }, { "epoch": 12.93, "learning_rate": 1.9526465245672187e-05, "loss": 2.1165, "step": 3981 }, { "epoch": 12.94, "learning_rate": 1.9407105554866557e-05, "loss": 2.0959, "step": 3984 }, { "epoch": 12.94, "learning_rate": 1.92880725734227e-05, "loss": 2.1551, "step": 3987 }, { "epoch": 12.95, "learning_rate": 1.9169366783879428e-05, "loss": 2.0905, "step": 3990 }, { "epoch": 12.96, "learning_rate": 1.9050988667448977e-05, "loss": 2.1042, "step": 3993 }, { "epoch": 12.97, "learning_rate": 1.8932938704015314e-05, "loss": 2.1607, "step": 3996 }, { "epoch": 12.98, "learning_rate": 1.8815217372132198e-05, "loss": 2.0262, "step": 3999 }, { "epoch": 12.99, "learning_rate": 1.8697825149021086e-05, "loss": 2.1215, "step": 4002 }, { "epoch": 13.0, "eval_accuracy": 0.6475752433152033, "eval_loss": 2.025885820388794, "eval_runtime": 16.3422, "eval_samples_per_second": 134.437, "eval_steps_per_second": 67.249, "step": 4004 }, { "epoch": 13.0, "learning_rate": 1.8580762510569295e-05, "loss": 2.1667, "step": 4005 }, { "epoch": 13.01, "learning_rate": 1.846402993132811e-05, "loss": 2.1035, "step": 4008 }, { "epoch": 13.02, "learning_rate": 1.8347627884510832e-05, "loss": 2.1239, "step": 4011 }, { "epoch": 13.03, "learning_rate": 1.823155684199074e-05, "loss": 2.1166, "step": 4014 }, { "epoch": 13.04, "learning_rate": 1.8115817274299396e-05, "loss": 2.1101, "step": 4017 }, { "epoch": 13.05, "learning_rate": 1.800040965062455e-05, "loss": 2.0607, "step": 4020 }, { "epoch": 13.06, "learning_rate": 1.7885334438808287e-05, "loss": 2.0964, "step": 4023 }, { "epoch": 13.07, "learning_rate": 1.777059210534524e-05, "loss": 2.1132, "step": 4026 }, { "epoch": 13.08, "learning_rate": 1.7656183115380577e-05, "loss": 2.0793, "step": 4029 }, { "epoch": 13.09, "learning_rate": 1.754210793270812e-05, "loss": 2.1091, "step": 4032 }, { "epoch": 13.1, "learning_rate": 1.742836701976849e-05, "loss": 2.0793, "step": 4035 }, { "epoch": 13.11, "learning_rate": 1.7314960837647297e-05, "loss": 2.0164, "step": 4038 }, { "epoch": 13.12, "learning_rate": 1.7201889846073183e-05, "loss": 2.0697, "step": 4041 }, { "epoch": 13.13, "learning_rate": 1.7089154503415895e-05, "loss": 2.0631, "step": 4044 }, { "epoch": 13.14, "learning_rate": 1.697675526668473e-05, "loss": 2.0714, "step": 4047 }, { "epoch": 13.15, "learning_rate": 1.6864692591526278e-05, "loss": 2.1084, "step": 4050 }, { "epoch": 13.16, "learning_rate": 1.6752966932222826e-05, "loss": 2.1333, "step": 4053 }, { "epoch": 13.17, "learning_rate": 1.664157874169049e-05, "loss": 2.1206, "step": 4056 }, { "epoch": 13.18, "learning_rate": 1.6530528471477326e-05, "loss": 2.1727, "step": 4059 }, { "epoch": 13.19, "learning_rate": 1.6419816571761482e-05, "loss": 2.1396, "step": 4062 }, { "epoch": 13.2, "learning_rate": 1.6309443491349475e-05, "loss": 2.0632, "step": 4065 }, { "epoch": 13.21, "learning_rate": 1.6199409677674314e-05, "loss": 2.0968, "step": 4068 }, { "epoch": 13.22, "learning_rate": 1.6089715576793584e-05, "loss": 2.0538, "step": 4071 }, { "epoch": 13.23, "learning_rate": 1.5980361633387853e-05, "loss": 2.1114, "step": 4074 }, { "epoch": 13.24, "learning_rate": 1.587134829075867e-05, "loss": 2.1656, "step": 4077 }, { "epoch": 13.25, "learning_rate": 1.576267599082686e-05, "loss": 2.0781, "step": 4080 }, { "epoch": 13.26, "learning_rate": 1.5654345174130756e-05, "loss": 2.1749, "step": 4083 }, { "epoch": 13.27, "learning_rate": 1.5546356279824382e-05, "loss": 2.0654, "step": 4086 }, { "epoch": 13.28, "learning_rate": 1.5438709745675606e-05, "loss": 2.0904, "step": 4089 }, { "epoch": 13.29, "learning_rate": 1.5331406008064475e-05, "loss": 2.0368, "step": 4092 }, { "epoch": 13.3, "learning_rate": 1.522444550198141e-05, "loss": 2.0759, "step": 4095 }, { "epoch": 13.31, "learning_rate": 1.511782866102539e-05, "loss": 2.0462, "step": 4098 }, { "epoch": 13.31, "learning_rate": 1.5011555917402265e-05, "loss": 2.0873, "step": 4101 }, { "epoch": 13.32, "learning_rate": 1.4905627701923009e-05, "loss": 2.0913, "step": 4104 }, { "epoch": 13.33, "learning_rate": 1.480004444400187e-05, "loss": 2.0516, "step": 4107 }, { "epoch": 13.34, "learning_rate": 1.4694806571654696e-05, "loss": 2.1136, "step": 4110 }, { "epoch": 13.35, "learning_rate": 1.4589914511497305e-05, "loss": 2.1294, "step": 4113 }, { "epoch": 13.36, "learning_rate": 1.4485368688743527e-05, "loss": 2.068, "step": 4116 }, { "epoch": 13.37, "learning_rate": 1.4381169527203719e-05, "loss": 2.0402, "step": 4119 }, { "epoch": 13.38, "learning_rate": 1.4277317449282834e-05, "loss": 2.048, "step": 4122 }, { "epoch": 13.39, "learning_rate": 1.4173812875978886e-05, "loss": 2.0875, "step": 4125 }, { "epoch": 13.4, "learning_rate": 1.407065622688113e-05, "loss": 2.1008, "step": 4128 }, { "epoch": 13.41, "learning_rate": 1.3967847920168386e-05, "loss": 2.1113, "step": 4131 }, { "epoch": 13.42, "learning_rate": 1.386538837260738e-05, "loss": 2.0277, "step": 4134 }, { "epoch": 13.43, "learning_rate": 1.376327799955105e-05, "loss": 2.1696, "step": 4137 }, { "epoch": 13.44, "learning_rate": 1.3661517214936782e-05, "loss": 2.1531, "step": 4140 }, { "epoch": 13.45, "learning_rate": 1.356010643128487e-05, "loss": 2.1222, "step": 4143 }, { "epoch": 13.46, "learning_rate": 1.345904605969669e-05, "loss": 2.0299, "step": 4146 }, { "epoch": 13.47, "learning_rate": 1.3358336509853131e-05, "loss": 2.1065, "step": 4149 }, { "epoch": 13.48, "learning_rate": 1.3257978190012931e-05, "loss": 2.0989, "step": 4152 }, { "epoch": 13.49, "learning_rate": 1.3157971507011036e-05, "loss": 2.0679, "step": 4155 }, { "epoch": 13.5, "learning_rate": 1.3058316866256826e-05, "loss": 2.1828, "step": 4158 }, { "epoch": 13.51, "learning_rate": 1.295901467173265e-05, "loss": 2.057, "step": 4161 }, { "epoch": 13.52, "learning_rate": 1.2860065325992066e-05, "loss": 2.0964, "step": 4164 }, { "epoch": 13.53, "learning_rate": 1.2761469230158208e-05, "loss": 2.1366, "step": 4167 }, { "epoch": 13.54, "learning_rate": 1.2663226783922266e-05, "loss": 2.0889, "step": 4170 }, { "epoch": 13.55, "learning_rate": 1.2565338385541792e-05, "loss": 2.0918, "step": 4173 }, { "epoch": 13.56, "learning_rate": 1.2467804431839037e-05, "loss": 2.1852, "step": 4176 }, { "epoch": 13.57, "learning_rate": 1.2370625318199414e-05, "loss": 2.0561, "step": 4179 }, { "epoch": 13.58, "learning_rate": 1.2273801438569932e-05, "loss": 2.0864, "step": 4182 }, { "epoch": 13.59, "learning_rate": 1.2177333185457474e-05, "loss": 2.125, "step": 4185 }, { "epoch": 13.6, "learning_rate": 1.2081220949927252e-05, "loss": 2.0829, "step": 4188 }, { "epoch": 13.61, "learning_rate": 1.1985465121601392e-05, "loss": 2.0291, "step": 4191 }, { "epoch": 13.62, "learning_rate": 1.189006608865707e-05, "loss": 2.1237, "step": 4194 }, { "epoch": 13.63, "learning_rate": 1.1795024237825092e-05, "loss": 2.1423, "step": 4197 }, { "epoch": 13.64, "learning_rate": 1.1700339954388384e-05, "loss": 2.1, "step": 4200 }, { "epoch": 13.65, "learning_rate": 1.1606013622180278e-05, "loss": 2.0367, "step": 4203 }, { "epoch": 13.66, "learning_rate": 1.1512045623583068e-05, "loss": 2.0967, "step": 4206 }, { "epoch": 13.67, "learning_rate": 1.1418436339526429e-05, "loss": 2.0585, "step": 4209 }, { "epoch": 13.68, "learning_rate": 1.1325186149485889e-05, "loss": 2.1754, "step": 4212 }, { "epoch": 13.69, "learning_rate": 1.1232295431481222e-05, "loss": 2.0563, "step": 4215 }, { "epoch": 13.69, "learning_rate": 1.1139764562075017e-05, "loss": 2.1228, "step": 4218 }, { "epoch": 13.7, "learning_rate": 1.104759391637108e-05, "loss": 2.013, "step": 4221 }, { "epoch": 13.71, "learning_rate": 1.0955783868012892e-05, "loss": 2.1053, "step": 4224 }, { "epoch": 13.72, "learning_rate": 1.0864334789182218e-05, "loss": 2.0723, "step": 4227 }, { "epoch": 13.73, "learning_rate": 1.0773247050597468e-05, "loss": 2.142, "step": 4230 }, { "epoch": 13.74, "learning_rate": 1.0682521021512249e-05, "loss": 2.0928, "step": 4233 }, { "epoch": 13.75, "learning_rate": 1.0592157069713826e-05, "loss": 2.0371, "step": 4236 }, { "epoch": 13.76, "learning_rate": 1.0502155561521766e-05, "loss": 2.1179, "step": 4239 }, { "epoch": 13.77, "learning_rate": 1.0412516861786236e-05, "loss": 2.0816, "step": 4242 }, { "epoch": 13.78, "learning_rate": 1.032324133388668e-05, "loss": 2.0207, "step": 4245 }, { "epoch": 13.79, "learning_rate": 1.0234329339730398e-05, "loss": 2.0805, "step": 4248 }, { "epoch": 13.8, "learning_rate": 1.0145781239750863e-05, "loss": 2.1022, "step": 4251 }, { "epoch": 13.81, "learning_rate": 1.0057597392906414e-05, "loss": 2.1438, "step": 4254 }, { "epoch": 13.82, "learning_rate": 9.969778156678854e-06, "loss": 2.097, "step": 4257 }, { "epoch": 13.83, "learning_rate": 9.88232388707182e-06, "loss": 2.0942, "step": 4260 }, { "epoch": 13.84, "learning_rate": 9.795234938609466e-06, "loss": 2.0325, "step": 4263 }, { "epoch": 13.85, "learning_rate": 9.708511664335029e-06, "loss": 2.1505, "step": 4266 }, { "epoch": 13.86, "learning_rate": 9.62215441580936e-06, "loss": 2.05, "step": 4269 }, { "epoch": 13.87, "learning_rate": 9.536163543109488e-06, "loss": 2.0526, "step": 4272 }, { "epoch": 13.88, "learning_rate": 9.450539394827185e-06, "loss": 2.0956, "step": 4275 }, { "epoch": 13.89, "learning_rate": 9.365282318067681e-06, "loss": 2.0701, "step": 4278 }, { "epoch": 13.9, "learning_rate": 9.280392658448078e-06, "loss": 2.1114, "step": 4281 }, { "epoch": 13.91, "learning_rate": 9.19587076009607e-06, "loss": 2.0833, "step": 4284 }, { "epoch": 13.92, "learning_rate": 9.11171696564853e-06, "loss": 2.1273, "step": 4287 }, { "epoch": 13.93, "learning_rate": 9.027931616250063e-06, "loss": 2.0479, "step": 4290 }, { "epoch": 13.94, "learning_rate": 8.94451505155165e-06, "loss": 2.0574, "step": 4293 }, { "epoch": 13.95, "learning_rate": 8.861467609709373e-06, "loss": 2.0643, "step": 4296 }, { "epoch": 13.96, "learning_rate": 8.778789627382833e-06, "loss": 2.1623, "step": 4299 }, { "epoch": 13.97, "learning_rate": 8.696481439734017e-06, "loss": 2.0858, "step": 4302 }, { "epoch": 13.98, "learning_rate": 8.614543380425766e-06, "loss": 2.1034, "step": 4305 }, { "epoch": 13.99, "learning_rate": 8.532975781620512e-06, "loss": 2.1097, "step": 4308 }, { "epoch": 14.0, "learning_rate": 8.451778973978874e-06, "loss": 2.1255, "step": 4311 }, { "epoch": 14.0, "eval_accuracy": 0.6460723583804651, "eval_loss": 2.037827730178833, "eval_runtime": 16.2998, "eval_samples_per_second": 134.787, "eval_steps_per_second": 67.424, "step": 4312 }, { "epoch": 14.01, "learning_rate": 8.370953286658389e-06, "loss": 2.0412, "step": 4314 }, { "epoch": 14.02, "learning_rate": 8.290499047312106e-06, "loss": 2.1136, "step": 4317 }, { "epoch": 14.03, "learning_rate": 8.210416582087332e-06, "loss": 2.1369, "step": 4320 }, { "epoch": 14.04, "learning_rate": 8.130706215624195e-06, "loss": 2.0917, "step": 4323 }, { "epoch": 14.05, "learning_rate": 8.051368271054493e-06, "loss": 2.1272, "step": 4326 }, { "epoch": 14.06, "learning_rate": 7.972403070000222e-06, "loss": 2.1439, "step": 4329 }, { "epoch": 14.06, "learning_rate": 7.893810932572333e-06, "loss": 2.0715, "step": 4332 }, { "epoch": 14.07, "learning_rate": 7.815592177369502e-06, "loss": 2.0255, "step": 4335 }, { "epoch": 14.08, "learning_rate": 7.737747121476757e-06, "loss": 2.0631, "step": 4338 }, { "epoch": 14.09, "learning_rate": 7.66027608046419e-06, "loss": 2.1339, "step": 4341 }, { "epoch": 14.1, "learning_rate": 7.58317936838574e-06, "loss": 2.0685, "step": 4344 }, { "epoch": 14.11, "learning_rate": 7.506457297777847e-06, "loss": 2.0141, "step": 4347 }, { "epoch": 14.12, "learning_rate": 7.4301101796582225e-06, "loss": 2.0722, "step": 4350 }, { "epoch": 14.13, "learning_rate": 7.354138323524617e-06, "loss": 2.0758, "step": 4353 }, { "epoch": 14.14, "learning_rate": 7.278542037353542e-06, "loss": 2.0993, "step": 4356 }, { "epoch": 14.15, "learning_rate": 7.203321627598947e-06, "loss": 2.114, "step": 4359 }, { "epoch": 14.16, "learning_rate": 7.128477399191136e-06, "loss": 2.114, "step": 4362 }, { "epoch": 14.17, "learning_rate": 7.054009655535354e-06, "loss": 2.0214, "step": 4365 }, { "epoch": 14.18, "learning_rate": 6.979918698510701e-06, "loss": 2.0729, "step": 4368 }, { "epoch": 14.19, "learning_rate": 6.906204828468821e-06, "loss": 1.9927, "step": 4371 }, { "epoch": 14.2, "learning_rate": 6.832868344232757e-06, "loss": 2.0514, "step": 4374 }, { "epoch": 14.21, "learning_rate": 6.759909543095632e-06, "loss": 2.1031, "step": 4377 }, { "epoch": 14.22, "learning_rate": 6.687328720819552e-06, "loss": 2.0984, "step": 4380 }, { "epoch": 14.23, "learning_rate": 6.615126171634367e-06, "loss": 2.0636, "step": 4383 }, { "epoch": 14.24, "learning_rate": 6.543302188236445e-06, "loss": 2.0285, "step": 4386 }, { "epoch": 14.25, "learning_rate": 6.471857061787501e-06, "loss": 2.0266, "step": 4389 }, { "epoch": 14.26, "learning_rate": 6.400791081913538e-06, "loss": 2.1057, "step": 4392 }, { "epoch": 14.27, "learning_rate": 6.33010453670343e-06, "loss": 2.0328, "step": 4395 }, { "epoch": 14.28, "learning_rate": 6.25979771270796e-06, "loss": 2.0908, "step": 4398 }, { "epoch": 14.29, "learning_rate": 6.189870894938587e-06, "loss": 2.067, "step": 4401 }, { "epoch": 14.3, "learning_rate": 6.120324366866281e-06, "loss": 2.0099, "step": 4404 }, { "epoch": 14.31, "learning_rate": 6.051158410420355e-06, "loss": 2.1048, "step": 4407 }, { "epoch": 14.32, "learning_rate": 5.98237330598741e-06, "loss": 2.0286, "step": 4410 }, { "epoch": 14.33, "learning_rate": 5.91396933241013e-06, "loss": 2.0796, "step": 4413 }, { "epoch": 14.34, "learning_rate": 5.845946766986099e-06, "loss": 2.0391, "step": 4416 }, { "epoch": 14.35, "learning_rate": 5.778305885466828e-06, "loss": 2.0937, "step": 4419 }, { "epoch": 14.36, "learning_rate": 5.711046962056488e-06, "loss": 2.1198, "step": 4422 }, { "epoch": 14.37, "learning_rate": 5.644170269410853e-06, "loss": 2.1277, "step": 4425 }, { "epoch": 14.38, "learning_rate": 5.577676078636251e-06, "loss": 2.1039, "step": 4428 }, { "epoch": 14.39, "learning_rate": 5.511564659288404e-06, "loss": 2.0803, "step": 4431 }, { "epoch": 14.4, "learning_rate": 5.445836279371308e-06, "loss": 2.0594, "step": 4434 }, { "epoch": 14.41, "learning_rate": 5.380491205336202e-06, "loss": 2.131, "step": 4437 }, { "epoch": 14.42, "learning_rate": 5.315529702080491e-06, "loss": 2.0799, "step": 4440 }, { "epoch": 14.43, "learning_rate": 5.250952032946643e-06, "loss": 2.0678, "step": 4443 }, { "epoch": 14.44, "learning_rate": 5.186758459721075e-06, "loss": 2.115, "step": 4446 }, { "epoch": 14.44, "learning_rate": 5.122949242633279e-06, "loss": 2.0485, "step": 4449 }, { "epoch": 14.45, "learning_rate": 5.059524640354496e-06, "loss": 2.1301, "step": 4452 }, { "epoch": 14.46, "learning_rate": 4.996484909996868e-06, "loss": 2.0767, "step": 4455 }, { "epoch": 14.47, "learning_rate": 4.933830307112353e-06, "loss": 2.1214, "step": 4458 }, { "epoch": 14.48, "learning_rate": 4.871561085691634e-06, "loss": 2.0206, "step": 4461 }, { "epoch": 14.49, "learning_rate": 4.8096774981631235e-06, "loss": 2.1629, "step": 4464 }, { "epoch": 14.5, "learning_rate": 4.7481797953919605e-06, "loss": 2.0928, "step": 4467 }, { "epoch": 14.51, "learning_rate": 4.687068226679004e-06, "loss": 2.1403, "step": 4470 }, { "epoch": 14.52, "learning_rate": 4.6263430397597395e-06, "loss": 2.0604, "step": 4473 }, { "epoch": 14.53, "learning_rate": 4.566004480803332e-06, "loss": 2.1267, "step": 4476 }, { "epoch": 14.54, "learning_rate": 4.5060527944116856e-06, "loss": 2.1578, "step": 4479 }, { "epoch": 14.55, "learning_rate": 4.446488223618306e-06, "loss": 2.1332, "step": 4482 }, { "epoch": 14.56, "learning_rate": 4.387311009887463e-06, "loss": 1.9903, "step": 4485 }, { "epoch": 14.57, "learning_rate": 4.328521393113149e-06, "loss": 2.2028, "step": 4488 }, { "epoch": 14.58, "learning_rate": 4.270119611618073e-06, "loss": 2.0242, "step": 4491 }, { "epoch": 14.59, "learning_rate": 4.21210590215273e-06, "loss": 2.0528, "step": 4494 }, { "epoch": 14.6, "learning_rate": 4.1544804998944756e-06, "loss": 2.1233, "step": 4497 }, { "epoch": 14.61, "learning_rate": 4.097243638446502e-06, "loss": 2.1108, "step": 4500 }, { "epoch": 14.62, "learning_rate": 4.040395549836928e-06, "loss": 2.0646, "step": 4503 }, { "epoch": 14.63, "learning_rate": 3.983936464517901e-06, "loss": 2.1225, "step": 4506 }, { "epoch": 14.64, "learning_rate": 3.9278666113645615e-06, "loss": 2.0553, "step": 4509 }, { "epoch": 14.65, "learning_rate": 3.872186217674167e-06, "loss": 2.1065, "step": 4512 }, { "epoch": 14.66, "learning_rate": 3.816895509165252e-06, "loss": 2.0563, "step": 4515 }, { "epoch": 14.67, "learning_rate": 3.7619947099765353e-06, "loss": 2.1165, "step": 4518 }, { "epoch": 14.68, "learning_rate": 3.707484042666198e-06, "loss": 2.0359, "step": 4521 }, { "epoch": 14.69, "learning_rate": 3.6533637282108347e-06, "loss": 2.0946, "step": 4524 }, { "epoch": 14.7, "learning_rate": 3.599633986004669e-06, "loss": 2.0738, "step": 4527 }, { "epoch": 14.71, "learning_rate": 3.5462950338585597e-06, "loss": 2.0815, "step": 4530 }, { "epoch": 14.72, "learning_rate": 3.4933470879992104e-06, "loss": 2.05, "step": 4533 }, { "epoch": 14.73, "learning_rate": 3.440790363068247e-06, "loss": 2.0606, "step": 4536 }, { "epoch": 14.74, "learning_rate": 3.3886250721213544e-06, "loss": 2.1675, "step": 4539 }, { "epoch": 14.75, "learning_rate": 3.3368514266273964e-06, "loss": 2.0499, "step": 4542 }, { "epoch": 14.76, "learning_rate": 3.2854696364675974e-06, "loss": 2.1578, "step": 4545 }, { "epoch": 14.77, "learning_rate": 3.2344799099346733e-06, "loss": 2.0859, "step": 4548 }, { "epoch": 14.78, "learning_rate": 3.1838824537319456e-06, "loss": 2.1324, "step": 4551 }, { "epoch": 14.79, "learning_rate": 3.1336774729725736e-06, "loss": 2.117, "step": 4554 }, { "epoch": 14.8, "learning_rate": 3.0838651711787013e-06, "loss": 2.0503, "step": 4557 }, { "epoch": 14.81, "learning_rate": 3.034445750280579e-06, "loss": 2.0449, "step": 4560 }, { "epoch": 14.81, "learning_rate": 2.985419410615831e-06, "loss": 2.1285, "step": 4563 }, { "epoch": 14.82, "learning_rate": 2.9367863509285775e-06, "loss": 2.0391, "step": 4566 }, { "epoch": 14.83, "learning_rate": 2.8885467683686497e-06, "loss": 2.0469, "step": 4569 }, { "epoch": 14.84, "learning_rate": 2.840700858490786e-06, "loss": 2.1386, "step": 4572 }, { "epoch": 14.85, "learning_rate": 2.7932488152538794e-06, "loss": 2.1428, "step": 4575 }, { "epoch": 14.86, "learning_rate": 2.7461908310201123e-06, "loss": 2.0901, "step": 4578 }, { "epoch": 14.87, "learning_rate": 2.6995270965542554e-06, "loss": 2.0583, "step": 4581 }, { "epoch": 14.88, "learning_rate": 2.653257801022835e-06, "loss": 2.0573, "step": 4584 }, { "epoch": 14.89, "learning_rate": 2.607383131993424e-06, "loss": 2.035, "step": 4587 }, { "epoch": 14.9, "learning_rate": 2.561903275433797e-06, "loss": 2.1873, "step": 4590 }, { "epoch": 14.91, "learning_rate": 2.5168184157113084e-06, "loss": 2.0505, "step": 4593 }, { "epoch": 14.92, "learning_rate": 2.472128735591983e-06, "loss": 2.0236, "step": 4596 }, { "epoch": 14.93, "learning_rate": 2.4278344162398935e-06, "loss": 2.022, "step": 4599 }, { "epoch": 14.94, "learning_rate": 2.3839356372164056e-06, "loss": 1.9994, "step": 4602 }, { "epoch": 14.95, "learning_rate": 2.3404325764794012e-06, "loss": 1.9757, "step": 4605 }, { "epoch": 14.96, "learning_rate": 2.2973254103826e-06, "loss": 2.0497, "step": 4608 }, { "epoch": 14.97, "learning_rate": 2.254614313674863e-06, "loss": 2.1178, "step": 4611 }, { "epoch": 14.98, "learning_rate": 2.2122994594994227e-06, "loss": 2.1794, "step": 4614 }, { "epoch": 14.99, "learning_rate": 2.1703810193932307e-06, "loss": 2.0673, "step": 4617 }, { "epoch": 15.0, "learning_rate": 2.1288591632862343e-06, "loss": 2.1751, "step": 4620 }, { "epoch": 15.0, "eval_accuracy": 0.6458123953098828, "eval_loss": 2.0256659984588623, "eval_runtime": 16.3182, "eval_samples_per_second": 134.635, "eval_steps_per_second": 67.348, "step": 4620 }, { "epoch": 15.01, "learning_rate": 2.087734059500712e-06, "loss": 2.0922, "step": 4623 }, { "epoch": 15.02, "learning_rate": 2.0470058747505516e-06, "loss": 2.1413, "step": 4626 }, { "epoch": 15.03, "learning_rate": 2.006674774140638e-06, "loss": 2.0214, "step": 4629 }, { "epoch": 15.04, "learning_rate": 1.9667409211661437e-06, "loss": 2.1027, "step": 4632 }, { "epoch": 15.05, "learning_rate": 1.9272044777118524e-06, "loss": 2.0475, "step": 4635 }, { "epoch": 15.06, "learning_rate": 1.8880656040514921e-06, "loss": 2.0842, "step": 4638 }, { "epoch": 15.07, "learning_rate": 1.8493244588471793e-06, "loss": 2.0245, "step": 4641 }, { "epoch": 15.08, "learning_rate": 1.8109811991486646e-06, "loss": 2.0969, "step": 4644 }, { "epoch": 15.09, "learning_rate": 1.7730359803927343e-06, "loss": 2.1304, "step": 4647 }, { "epoch": 15.1, "learning_rate": 1.735488956402631e-06, "loss": 2.001, "step": 4650 }, { "epoch": 15.11, "learning_rate": 1.698340279387356e-06, "loss": 2.1577, "step": 4653 }, { "epoch": 15.12, "learning_rate": 1.6615900999410683e-06, "loss": 2.058, "step": 4656 }, { "epoch": 15.13, "learning_rate": 1.6252385670425307e-06, "loss": 2.0714, "step": 4659 }, { "epoch": 15.14, "learning_rate": 1.589285828054421e-06, "loss": 2.0709, "step": 4662 }, { "epoch": 15.15, "learning_rate": 1.5537320287227764e-06, "loss": 2.0754, "step": 4665 }, { "epoch": 15.16, "learning_rate": 1.5185773131764502e-06, "loss": 2.1037, "step": 4668 }, { "epoch": 15.17, "learning_rate": 1.4838218239264456e-06, "loss": 2.1344, "step": 4671 }, { "epoch": 15.18, "learning_rate": 1.4494657018653823e-06, "loss": 2.0933, "step": 4674 }, { "epoch": 15.19, "learning_rate": 1.4155090862668863e-06, "loss": 2.0771, "step": 4677 }, { "epoch": 15.19, "learning_rate": 1.3819521147851123e-06, "loss": 2.0867, "step": 4680 }, { "epoch": 15.2, "learning_rate": 1.3487949234540664e-06, "loss": 2.1519, "step": 4683 }, { "epoch": 15.21, "learning_rate": 1.3160376466871739e-06, "loss": 2.0861, "step": 4686 }, { "epoch": 15.22, "learning_rate": 1.2836804172766449e-06, "loss": 2.0644, "step": 4689 }, { "epoch": 15.23, "learning_rate": 1.2517233663929651e-06, "loss": 2.1133, "step": 4692 }, { "epoch": 15.24, "learning_rate": 1.2201666235843735e-06, "loss": 2.0873, "step": 4695 }, { "epoch": 15.25, "learning_rate": 1.18901031677634e-06, "loss": 2.1127, "step": 4698 }, { "epoch": 15.26, "learning_rate": 1.1582545722710225e-06, "loss": 2.0917, "step": 4701 }, { "epoch": 15.27, "learning_rate": 1.1278995147467885e-06, "loss": 2.0339, "step": 4704 }, { "epoch": 15.28, "learning_rate": 1.0979452672576718e-06, "loss": 2.0649, "step": 4707 }, { "epoch": 15.29, "learning_rate": 1.0683919512329166e-06, "loss": 2.0376, "step": 4710 }, { "epoch": 15.3, "learning_rate": 1.0392396864764231e-06, "loss": 2.0764, "step": 4713 }, { "epoch": 15.31, "learning_rate": 1.0104885911663474e-06, "loss": 2.1247, "step": 4716 }, { "epoch": 15.32, "learning_rate": 9.821387818545358e-07, "loss": 2.1067, "step": 4719 }, { "epoch": 15.33, "learning_rate": 9.54190373466113e-07, "loss": 2.0267, "step": 4722 }, { "epoch": 15.34, "learning_rate": 9.266434792989942e-07, "loss": 2.0377, "step": 4725 }, { "epoch": 15.35, "learning_rate": 8.994982110234307e-07, "loss": 2.0895, "step": 4728 }, { "epoch": 15.36, "learning_rate": 8.727546786815421e-07, "loss": 2.027, "step": 4731 }, { "epoch": 15.37, "learning_rate": 8.464129906868734e-07, "loss": 2.0593, "step": 4734 }, { "epoch": 15.38, "learning_rate": 8.204732538239835e-07, "loss": 1.9569, "step": 4737 }, { "epoch": 15.39, "learning_rate": 7.949355732479902e-07, "loss": 2.0478, "step": 4740 }, { "epoch": 15.4, "learning_rate": 7.698000524841376e-07, "loss": 2.0132, "step": 4743 }, { "epoch": 15.41, "learning_rate": 7.450667934273958e-07, "loss": 2.1406, "step": 4746 }, { "epoch": 15.42, "learning_rate": 7.207358963420063e-07, "loss": 2.0989, "step": 4749 }, { "epoch": 15.43, "learning_rate": 6.968074598611484e-07, "loss": 2.1065, "step": 4752 }, { "epoch": 15.44, "learning_rate": 6.732815809864734e-07, "loss": 2.0832, "step": 4755 }, { "epoch": 15.45, "learning_rate": 6.501583550877488e-07, "loss": 2.1096, "step": 4758 }, { "epoch": 15.46, "learning_rate": 6.274378759024257e-07, "loss": 2.0831, "step": 4761 }, { "epoch": 15.47, "learning_rate": 6.051202355353392e-07, "loss": 2.0592, "step": 4764 }, { "epoch": 15.48, "learning_rate": 5.832055244582524e-07, "loss": 2.1122, "step": 4767 }, { "epoch": 15.49, "learning_rate": 5.616938315095243e-07, "loss": 2.1232, "step": 4770 }, { "epoch": 15.5, "learning_rate": 5.405852438937764e-07, "loss": 2.0721, "step": 4773 }, { "epoch": 15.51, "learning_rate": 5.198798471814814e-07, "loss": 2.1295, "step": 4776 }, { "epoch": 15.52, "learning_rate": 4.995777253086753e-07, "loss": 2.0643, "step": 4779 }, { "epoch": 15.53, "learning_rate": 4.796789605765573e-07, "loss": 2.0741, "step": 4782 }, { "epoch": 15.54, "learning_rate": 4.601836336512233e-07, "loss": 2.0587, "step": 4785 }, { "epoch": 15.55, "learning_rate": 4.4109182356327774e-07, "loss": 2.1124, "step": 4788 }, { "epoch": 15.56, "learning_rate": 4.2240360770753327e-07, "loss": 2.1365, "step": 4791 }, { "epoch": 15.56, "learning_rate": 4.0411906184273376e-07, "loss": 2.1643, "step": 4794 }, { "epoch": 15.57, "learning_rate": 3.8623826009120955e-07, "loss": 2.066, "step": 4797 }, { "epoch": 15.58, "learning_rate": 3.6876127493854495e-07, "loss": 2.0562, "step": 4800 }, { "epoch": 15.59, "learning_rate": 3.516881772333669e-07, "loss": 2.1947, "step": 4803 }, { "epoch": 15.6, "learning_rate": 3.35019036187012e-07, "loss": 2.0512, "step": 4806 }, { "epoch": 15.61, "learning_rate": 3.187539193732048e-07, "loss": 2.0424, "step": 4809 }, { "epoch": 15.62, "learning_rate": 3.028928927278685e-07, "loss": 2.101, "step": 4812 }, { "epoch": 15.63, "learning_rate": 2.874360205488258e-07, "loss": 2.1028, "step": 4815 }, { "epoch": 15.64, "learning_rate": 2.723833654954655e-07, "loss": 2.0677, "step": 4818 }, { "epoch": 15.65, "learning_rate": 2.577349885886315e-07, "loss": 2.0715, "step": 4821 }, { "epoch": 15.66, "learning_rate": 2.434909492102455e-07, "loss": 2.106, "step": 4824 }, { "epoch": 15.67, "learning_rate": 2.2965130510310685e-07, "loss": 2.0889, "step": 4827 }, { "epoch": 15.68, "learning_rate": 2.1621611237071516e-07, "loss": 2.0497, "step": 4830 }, { "epoch": 15.69, "learning_rate": 2.031854254769594e-07, "loss": 2.0797, "step": 4833 }, { "epoch": 15.7, "learning_rate": 1.9055929724595134e-07, "loss": 2.0286, "step": 4836 }, { "epoch": 15.71, "learning_rate": 1.7833777886175907e-07, "loss": 1.9975, "step": 4839 }, { "epoch": 15.72, "learning_rate": 1.66520919868296e-07, "loss": 2.0781, "step": 4842 }, { "epoch": 15.73, "learning_rate": 1.5510876816898778e-07, "loss": 2.1324, "step": 4845 }, { "epoch": 15.74, "learning_rate": 1.4410137002670575e-07, "loss": 2.0367, "step": 4848 }, { "epoch": 15.75, "learning_rate": 1.334987700634893e-07, "loss": 2.1207, "step": 4851 }, { "epoch": 15.76, "learning_rate": 1.233010112604016e-07, "loss": 2.1331, "step": 4854 }, { "epoch": 15.77, "learning_rate": 1.1350813495737411e-07, "loss": 2.1653, "step": 4857 }, { "epoch": 15.78, "learning_rate": 1.0412018085297348e-07, "loss": 2.0604, "step": 4860 }, { "epoch": 15.79, "learning_rate": 9.513718700432384e-08, "loss": 2.0798, "step": 4863 }, { "epoch": 15.8, "learning_rate": 8.655918982689581e-08, "loss": 2.04, "step": 4866 }, { "epoch": 15.81, "learning_rate": 7.838622409436225e-08, "loss": 2.017, "step": 4869 }, { "epoch": 15.82, "learning_rate": 7.061832293849823e-08, "loss": 2.0456, "step": 4872 }, { "epoch": 15.83, "learning_rate": 6.325551784900352e-08, "loss": 2.1483, "step": 4875 }, { "epoch": 15.84, "learning_rate": 5.629783867336924e-08, "loss": 2.0091, "step": 4878 }, { "epoch": 15.85, "learning_rate": 4.9745313616822445e-08, "loss": 2.1577, "step": 4881 }, { "epoch": 15.86, "learning_rate": 4.3597969242126225e-08, "loss": 2.0558, "step": 4884 }, { "epoch": 15.87, "learning_rate": 3.7855830469535334e-08, "loss": 2.0223, "step": 4887 }, { "epoch": 15.88, "learning_rate": 3.2518920576662945e-08, "loss": 2.0815, "step": 4890 }, { "epoch": 15.89, "learning_rate": 2.7587261198414038e-08, "loss": 2.0999, "step": 4893 }, { "epoch": 15.9, "learning_rate": 2.3060872326841066e-08, "loss": 2.1187, "step": 4896 }, { "epoch": 15.91, "learning_rate": 1.8939772311143967e-08, "loss": 2.0901, "step": 4899 }, { "epoch": 15.92, "learning_rate": 1.522397785752583e-08, "loss": 2.1402, "step": 4902 }, { "epoch": 15.93, "learning_rate": 1.1913504029159583e-08, "loss": 2.1058, "step": 4905 }, { "epoch": 15.94, "learning_rate": 9.008364246121393e-09, "loss": 2.0656, "step": 4908 }, { "epoch": 15.94, "learning_rate": 6.508570285346238e-09, "loss": 2.1138, "step": 4911 }, { "epoch": 15.95, "learning_rate": 4.414132280550209e-09, "loss": 2.0542, "step": 4914 }, { "epoch": 15.96, "learning_rate": 2.7250587222082957e-09, "loss": 1.972, "step": 4917 }, { "epoch": 15.97, "learning_rate": 1.4413564575432858e-09, "loss": 2.069, "step": 4920 }, { "epoch": 15.98, "learning_rate": 5.630306904369498e-10, "loss": 2.1005, "step": 4923 }, { "epoch": 15.99, "learning_rate": 9.008498147444755e-11, "loss": 1.9516, "step": 4926 }, { "epoch": 16.0, "eval_accuracy": 0.6450172750488208, "eval_loss": 2.037081241607666, "eval_runtime": 16.3187, "eval_samples_per_second": 134.631, "eval_steps_per_second": 67.346, "step": 4928 }, { "epoch": 16.0, "step": 4928, "total_flos": 1.0508396840353792e+16, "train_loss": 2.4320973860366, "train_runtime": 3195.9564, "train_samples_per_second": 98.68, "train_steps_per_second": 1.542 } ], "max_steps": 4928, "num_train_epochs": 16, "total_flos": 1.0508396840353792e+16, "trial_name": null, "trial_params": null }