{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.557544757033248, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.0000000000000002e-06, "loss": 2.4523, "step": 10 }, { "epoch": 0.05, "learning_rate": 2.0000000000000003e-06, "loss": 2.4396, "step": 20 }, { "epoch": 0.08, "learning_rate": 3e-06, "loss": 2.3918, "step": 30 }, { "epoch": 0.1, "learning_rate": 4.000000000000001e-06, "loss": 2.2748, "step": 40 }, { "epoch": 0.13, "learning_rate": 5e-06, "loss": 2.166, "step": 50 }, { "epoch": 0.15, "learning_rate": 6e-06, "loss": 1.9984, "step": 60 }, { "epoch": 0.18, "learning_rate": 7.000000000000001e-06, "loss": 1.8286, "step": 70 }, { "epoch": 0.2, "learning_rate": 8.000000000000001e-06, "loss": 1.6089, "step": 80 }, { "epoch": 0.23, "learning_rate": 9e-06, "loss": 1.387, "step": 90 }, { "epoch": 0.26, "learning_rate": 1e-05, "loss": 1.1289, "step": 100 }, { "epoch": 0.28, "learning_rate": 1.1000000000000001e-05, "loss": 0.8848, "step": 110 }, { "epoch": 0.31, "learning_rate": 1.2e-05, "loss": 0.6848, "step": 120 }, { "epoch": 0.33, "learning_rate": 1.3000000000000001e-05, "loss": 0.4791, "step": 130 }, { "epoch": 0.36, "learning_rate": 1.4000000000000001e-05, "loss": 0.3629, "step": 140 }, { "epoch": 0.38, "learning_rate": 1.5e-05, "loss": 0.2723, "step": 150 }, { "epoch": 0.41, "learning_rate": 1.6000000000000003e-05, "loss": 0.2076, "step": 160 }, { "epoch": 0.43, "learning_rate": 1.7000000000000003e-05, "loss": 0.1543, "step": 170 }, { "epoch": 0.46, "learning_rate": 1.8e-05, "loss": 0.1494, "step": 180 }, { "epoch": 0.49, "learning_rate": 1.9e-05, "loss": 0.1076, "step": 190 }, { "epoch": 0.51, "learning_rate": 2e-05, "loss": 0.1173, "step": 200 }, { "epoch": 0.54, "learning_rate": 2.1e-05, "loss": 0.1025, "step": 210 }, { "epoch": 0.56, "learning_rate": 2.2000000000000003e-05, "loss": 0.1056, "step": 220 }, { "epoch": 0.59, "learning_rate": 2.3000000000000003e-05, "loss": 0.0841, "step": 230 }, { "epoch": 0.61, "learning_rate": 2.4e-05, "loss": 0.079, "step": 240 }, { "epoch": 0.64, "learning_rate": 2.5e-05, "loss": 0.0764, "step": 250 }, { "epoch": 0.66, "learning_rate": 2.6000000000000002e-05, "loss": 0.0659, "step": 260 }, { "epoch": 0.69, "learning_rate": 2.7000000000000002e-05, "loss": 0.0766, "step": 270 }, { "epoch": 0.72, "learning_rate": 2.8000000000000003e-05, "loss": 0.0729, "step": 280 }, { "epoch": 0.74, "learning_rate": 2.9e-05, "loss": 0.0862, "step": 290 }, { "epoch": 0.77, "learning_rate": 3e-05, "loss": 0.0565, "step": 300 }, { "epoch": 0.79, "learning_rate": 3.1e-05, "loss": 0.0725, "step": 310 }, { "epoch": 0.82, "learning_rate": 3.2000000000000005e-05, "loss": 0.0757, "step": 320 }, { "epoch": 0.84, "learning_rate": 3.3e-05, "loss": 0.0872, "step": 330 }, { "epoch": 0.87, "learning_rate": 3.4000000000000007e-05, "loss": 0.0692, "step": 340 }, { "epoch": 0.9, "learning_rate": 3.5e-05, "loss": 0.0646, "step": 350 }, { "epoch": 0.92, "learning_rate": 3.6e-05, "loss": 0.0517, "step": 360 }, { "epoch": 0.95, "learning_rate": 3.7e-05, "loss": 0.0708, "step": 370 }, { "epoch": 0.97, "learning_rate": 3.8e-05, "loss": 0.0744, "step": 380 }, { "epoch": 1.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.0745, "step": 390 }, { "epoch": 1.02, "learning_rate": 4e-05, "loss": 0.0433, "step": 400 }, { "epoch": 1.05, "learning_rate": 4.1e-05, "loss": 0.0369, "step": 410 }, { "epoch": 1.07, "learning_rate": 4.2e-05, "loss": 0.0347, "step": 420 }, { "epoch": 1.1, "learning_rate": 4.3e-05, "loss": 0.0426, "step": 430 }, { "epoch": 1.13, "learning_rate": 4.4000000000000006e-05, "loss": 0.0254, "step": 440 }, { "epoch": 1.15, "learning_rate": 4.5e-05, "loss": 0.0296, "step": 450 }, { "epoch": 1.18, "learning_rate": 4.600000000000001e-05, "loss": 0.0329, "step": 460 }, { "epoch": 1.2, "learning_rate": 4.7e-05, "loss": 0.0373, "step": 470 }, { "epoch": 1.23, "learning_rate": 4.8e-05, "loss": 0.0185, "step": 480 }, { "epoch": 1.25, "learning_rate": 4.9e-05, "loss": 0.0346, "step": 490 }, { "epoch": 1.28, "learning_rate": 5e-05, "loss": 0.0343, "step": 500 }, { "epoch": 1.3, "learning_rate": 4.9257057949479945e-05, "loss": 0.0398, "step": 510 }, { "epoch": 1.33, "learning_rate": 4.851411589895988e-05, "loss": 0.0266, "step": 520 }, { "epoch": 1.36, "learning_rate": 4.777117384843983e-05, "loss": 0.0394, "step": 530 }, { "epoch": 1.38, "learning_rate": 4.7028231797919764e-05, "loss": 0.0385, "step": 540 }, { "epoch": 1.41, "learning_rate": 4.6285289747399706e-05, "loss": 0.0467, "step": 550 }, { "epoch": 1.43, "learning_rate": 4.554234769687964e-05, "loss": 0.0561, "step": 560 }, { "epoch": 1.46, "learning_rate": 4.479940564635959e-05, "loss": 0.0224, "step": 570 }, { "epoch": 1.48, "learning_rate": 4.4056463595839526e-05, "loss": 0.0279, "step": 580 }, { "epoch": 1.51, "learning_rate": 4.331352154531947e-05, "loss": 0.0336, "step": 590 }, { "epoch": 1.53, "learning_rate": 4.25705794947994e-05, "loss": 0.0327, "step": 600 }, { "epoch": 1.56, "learning_rate": 4.182763744427935e-05, "loss": 0.0284, "step": 610 }, { "epoch": 1.59, "learning_rate": 4.108469539375929e-05, "loss": 0.0317, "step": 620 }, { "epoch": 1.61, "learning_rate": 4.034175334323923e-05, "loss": 0.0429, "step": 630 }, { "epoch": 1.64, "learning_rate": 3.9598811292719165e-05, "loss": 0.0342, "step": 640 }, { "epoch": 1.66, "learning_rate": 3.8855869242199114e-05, "loss": 0.0429, "step": 650 }, { "epoch": 1.69, "learning_rate": 3.811292719167905e-05, "loss": 0.055, "step": 660 }, { "epoch": 1.71, "learning_rate": 3.736998514115899e-05, "loss": 0.039, "step": 670 }, { "epoch": 1.74, "learning_rate": 3.662704309063893e-05, "loss": 0.0303, "step": 680 }, { "epoch": 1.76, "learning_rate": 3.5884101040118876e-05, "loss": 0.0299, "step": 690 }, { "epoch": 1.79, "learning_rate": 3.514115898959881e-05, "loss": 0.0331, "step": 700 }, { "epoch": 1.82, "learning_rate": 3.439821693907875e-05, "loss": 0.0287, "step": 710 }, { "epoch": 1.84, "learning_rate": 3.3655274888558695e-05, "loss": 0.0298, "step": 720 }, { "epoch": 1.87, "learning_rate": 3.291233283803864e-05, "loss": 0.0206, "step": 730 }, { "epoch": 1.89, "learning_rate": 3.216939078751858e-05, "loss": 0.0283, "step": 740 }, { "epoch": 1.92, "learning_rate": 3.1426448736998515e-05, "loss": 0.0359, "step": 750 }, { "epoch": 1.94, "learning_rate": 3.068350668647846e-05, "loss": 0.0199, "step": 760 }, { "epoch": 1.97, "learning_rate": 2.9940564635958396e-05, "loss": 0.0325, "step": 770 }, { "epoch": 1.99, "learning_rate": 2.9197622585438338e-05, "loss": 0.0319, "step": 780 }, { "epoch": 2.02, "learning_rate": 2.8454680534918276e-05, "loss": 0.0277, "step": 790 }, { "epoch": 2.05, "learning_rate": 2.771173848439822e-05, "loss": 0.0067, "step": 800 }, { "epoch": 2.07, "learning_rate": 2.6968796433878157e-05, "loss": 0.0046, "step": 810 }, { "epoch": 2.1, "learning_rate": 2.62258543833581e-05, "loss": 0.0093, "step": 820 }, { "epoch": 2.12, "learning_rate": 2.5482912332838038e-05, "loss": 0.0104, "step": 830 }, { "epoch": 2.15, "learning_rate": 2.473997028231798e-05, "loss": 0.0046, "step": 840 }, { "epoch": 2.17, "learning_rate": 2.3997028231797922e-05, "loss": 0.0047, "step": 850 }, { "epoch": 2.2, "learning_rate": 2.325408618127786e-05, "loss": 0.0048, "step": 860 }, { "epoch": 2.23, "learning_rate": 2.2511144130757803e-05, "loss": 0.004, "step": 870 }, { "epoch": 2.25, "learning_rate": 2.1768202080237742e-05, "loss": 0.011, "step": 880 }, { "epoch": 2.28, "learning_rate": 2.1025260029717684e-05, "loss": 0.0034, "step": 890 }, { "epoch": 2.3, "learning_rate": 2.0282317979197623e-05, "loss": 0.0068, "step": 900 }, { "epoch": 2.33, "learning_rate": 1.9539375928677565e-05, "loss": 0.01, "step": 910 }, { "epoch": 2.35, "learning_rate": 1.8796433878157504e-05, "loss": 0.0069, "step": 920 }, { "epoch": 2.38, "learning_rate": 1.8053491827637446e-05, "loss": 0.0063, "step": 930 }, { "epoch": 2.4, "learning_rate": 1.7310549777117385e-05, "loss": 0.0058, "step": 940 }, { "epoch": 2.43, "learning_rate": 1.6567607726597327e-05, "loss": 0.0053, "step": 950 }, { "epoch": 2.46, "learning_rate": 1.5824665676077265e-05, "loss": 0.0039, "step": 960 }, { "epoch": 2.48, "learning_rate": 1.5081723625557206e-05, "loss": 0.0033, "step": 970 }, { "epoch": 2.51, "learning_rate": 1.4338781575037146e-05, "loss": 0.0096, "step": 980 }, { "epoch": 2.53, "learning_rate": 1.3595839524517087e-05, "loss": 0.0132, "step": 990 }, { "epoch": 2.56, "learning_rate": 1.2852897473997027e-05, "loss": 0.003, "step": 1000 } ], "logging_steps": 10, "max_steps": 1173, "num_train_epochs": 3, "save_steps": 500, "total_flos": 9.912246111321588e+18, "trial_name": null, "trial_params": null }