{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.993660963806148, "eval_steps": 500, "global_step": 1098, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.999991813565924e-05, "loss": 2.2897, "step": 1 }, { "epoch": 0.05, "learning_rate": 3.996726317608652e-05, "loss": 1.6172, "step": 20 }, { "epoch": 0.11, "learning_rate": 3.986915987431006e-05, "loss": 1.5144, "step": 40 }, { "epoch": 0.16, "learning_rate": 3.970601125372218e-05, "loss": 1.5003, "step": 60 }, { "epoch": 0.22, "learning_rate": 3.947835141108928e-05, "loss": 1.4788, "step": 80 }, { "epoch": 0.27, "learning_rate": 3.9186925632429396e-05, "loss": 1.4834, "step": 100 }, { "epoch": 0.33, "learning_rate": 3.883268795318252e-05, "loss": 1.4782, "step": 120 }, { "epoch": 0.38, "learning_rate": 3.8416798035001545e-05, "loss": 1.4776, "step": 140 }, { "epoch": 0.44, "learning_rate": 3.794061736938837e-05, "loss": 1.4813, "step": 160 }, { "epoch": 0.49, "learning_rate": 3.740570482060311e-05, "loss": 1.4974, "step": 180 }, { "epoch": 0.55, "learning_rate": 3.681381152243763e-05, "loss": 1.4778, "step": 200 }, { "epoch": 0.6, "learning_rate": 3.6166875145559684e-05, "loss": 1.5029, "step": 220 }, { "epoch": 0.65, "learning_rate": 3.54670135541946e-05, "loss": 1.5029, "step": 240 }, { "epoch": 0.71, "learning_rate": 3.4716517872910405e-05, "loss": 1.4741, "step": 260 }, { "epoch": 0.76, "learning_rate": 3.391784498620369e-05, "loss": 1.4563, "step": 280 }, { "epoch": 0.82, "learning_rate": 3.307360949544012e-05, "loss": 1.4634, "step": 300 }, { "epoch": 0.87, "learning_rate": 3.2186575159479966e-05, "loss": 1.4616, "step": 320 }, { "epoch": 0.93, "learning_rate": 3.1259645847009384e-05, "loss": 1.4308, "step": 340 }, { "epoch": 0.98, "learning_rate": 3.0295856030196618e-05, "loss": 1.4434, "step": 360 }, { "epoch": 1.0, "eval_loss": 1.3897957801818848, "eval_runtime": 11.4488, "eval_samples_per_second": 26.204, "eval_steps_per_second": 26.204, "step": 366 }, { "epoch": 1.04, "learning_rate": 2.9298360850793944e-05, "loss": 1.1296, "step": 380 }, { "epoch": 1.09, "learning_rate": 2.827042579120562e-05, "loss": 0.9657, "step": 400 }, { "epoch": 1.15, "learning_rate": 2.721541598433567e-05, "loss": 0.9303, "step": 420 }, { "epoch": 1.2, "learning_rate": 2.613678519721155e-05, "loss": 0.9411, "step": 440 }, { "epoch": 1.25, "learning_rate": 2.5038064524447827e-05, "loss": 0.9468, "step": 460 }, { "epoch": 1.31, "learning_rate": 2.392285082856394e-05, "loss": 0.938, "step": 480 }, { "epoch": 1.36, "learning_rate": 2.2794794964998705e-05, "loss": 0.938, "step": 500 }, { "epoch": 1.42, "learning_rate": 2.1657589830369113e-05, "loss": 0.9383, "step": 520 }, { "epoch": 1.47, "learning_rate": 2.0514958273099778e-05, "loss": 0.9431, "step": 540 }, { "epoch": 1.53, "learning_rate": 1.93706409059995e-05, "loss": 0.937, "step": 560 }, { "epoch": 1.58, "learning_rate": 1.82283838606831e-05, "loss": 0.9408, "step": 580 }, { "epoch": 1.64, "learning_rate": 1.7091926523926205e-05, "loss": 0.9567, "step": 600 }, { "epoch": 1.69, "learning_rate": 1.5964989296100682e-05, "loss": 0.9302, "step": 620 }, { "epoch": 1.74, "learning_rate": 1.4851261411765414e-05, "loss": 0.9309, "step": 640 }, { "epoch": 1.8, "learning_rate": 1.375438886228411e-05, "loss": 0.9354, "step": 660 }, { "epoch": 1.85, "learning_rate": 1.2677962460007555e-05, "loss": 0.9429, "step": 680 }, { "epoch": 1.91, "learning_rate": 1.162550608309446e-05, "loss": 0.9209, "step": 700 }, { "epoch": 1.96, "learning_rate": 1.060046513945361e-05, "loss": 0.9304, "step": 720 }, { "epoch": 2.0, "eval_loss": 1.4105572700500488, "eval_runtime": 11.4541, "eval_samples_per_second": 26.191, "eval_steps_per_second": 26.191, "step": 733 }, { "epoch": 2.02, "learning_rate": 9.606195287572577e-06, "loss": 0.7909, "step": 740 }, { "epoch": 2.07, "learning_rate": 8.645951451157741e-06, "loss": 0.5917, "step": 760 }, { "epoch": 2.13, "learning_rate": 7.72287716354776e-06, "loss": 0.5678, "step": 780 }, { "epoch": 2.18, "learning_rate": 6.8399942767839075e-06, "loss": 0.5837, "step": 800 }, { "epoch": 2.24, "learning_rate": 6.000193069026181e-06, "loss": 0.5701, "step": 820 }, { "epoch": 2.29, "learning_rate": 5.206222782700667e-06, "loss": 0.5467, "step": 840 }, { "epoch": 2.34, "learning_rate": 4.460682624352952e-06, "loss": 0.5695, "step": 860 }, { "epoch": 2.4, "learning_rate": 3.766013255671479e-06, "loss": 0.5557, "step": 880 }, { "epoch": 2.45, "learning_rate": 3.1244888035362875e-06, "loss": 0.5468, "step": 900 }, { "epoch": 2.51, "learning_rate": 2.5382094152499705e-06, "loss": 0.5793, "step": 920 }, { "epoch": 2.56, "learning_rate": 2.009094383322356e-06, "loss": 0.5462, "step": 940 }, { "epoch": 2.62, "learning_rate": 1.5388758623164802e-06, "loss": 0.5617, "step": 960 }, { "epoch": 2.67, "learning_rate": 1.1290931983246334e-06, "loss": 0.5574, "step": 980 }, { "epoch": 2.73, "learning_rate": 7.810878896382101e-07, "loss": 0.5632, "step": 1000 }, { "epoch": 2.78, "learning_rate": 4.959991951083498e-07, "loss": 0.57, "step": 1020 }, { "epoch": 2.84, "learning_rate": 2.747604045743102e-07, "loss": 0.5498, "step": 1040 }, { "epoch": 2.89, "learning_rate": 1.180957835689478e-07, "loss": 0.5369, "step": 1060 }, { "epoch": 2.94, "learning_rate": 2.651820230338942e-08, "loss": 0.5651, "step": 1080 }, { "epoch": 2.99, "eval_loss": 1.5518141984939575, "eval_runtime": 11.4288, "eval_samples_per_second": 26.25, "eval_steps_per_second": 26.25, "step": 1098 }, { "epoch": 2.99, "step": 1098, "total_flos": 6.035394717233971e+16, "train_loss": 0.9967951453231506, "train_runtime": 11703.2983, "train_samples_per_second": 3.761, "train_steps_per_second": 0.094 } ], "logging_steps": 20, "max_steps": 1098, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 20, "total_flos": 6.035394717233971e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }