|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.993660963806148, |
|
"eval_steps": 500, |
|
"global_step": 1098, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.999991813565924e-05, |
|
"loss": 2.2897, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.996726317608652e-05, |
|
"loss": 1.6172, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.986915987431006e-05, |
|
"loss": 1.5144, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.970601125372218e-05, |
|
"loss": 1.5003, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.947835141108928e-05, |
|
"loss": 1.4788, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.9186925632429396e-05, |
|
"loss": 1.4834, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.883268795318252e-05, |
|
"loss": 1.4782, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8416798035001545e-05, |
|
"loss": 1.4776, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.794061736938837e-05, |
|
"loss": 1.4813, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.740570482060311e-05, |
|
"loss": 1.4974, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.681381152243763e-05, |
|
"loss": 1.4778, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.6166875145559684e-05, |
|
"loss": 1.5029, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.54670135541946e-05, |
|
"loss": 1.5029, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.4716517872910405e-05, |
|
"loss": 1.4741, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.391784498620369e-05, |
|
"loss": 1.4563, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.307360949544012e-05, |
|
"loss": 1.4634, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.2186575159479966e-05, |
|
"loss": 1.4616, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.1259645847009384e-05, |
|
"loss": 1.4308, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.0295856030196618e-05, |
|
"loss": 1.4434, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.3897957801818848, |
|
"eval_runtime": 11.4488, |
|
"eval_samples_per_second": 26.204, |
|
"eval_steps_per_second": 26.204, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.9298360850793944e-05, |
|
"loss": 1.1296, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.827042579120562e-05, |
|
"loss": 0.9657, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.721541598433567e-05, |
|
"loss": 0.9303, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.613678519721155e-05, |
|
"loss": 0.9411, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.5038064524447827e-05, |
|
"loss": 0.9468, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.392285082856394e-05, |
|
"loss": 0.938, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.2794794964998705e-05, |
|
"loss": 0.938, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.1657589830369113e-05, |
|
"loss": 0.9383, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.0514958273099778e-05, |
|
"loss": 0.9431, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.93706409059995e-05, |
|
"loss": 0.937, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.82283838606831e-05, |
|
"loss": 0.9408, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.7091926523926205e-05, |
|
"loss": 0.9567, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.5964989296100682e-05, |
|
"loss": 0.9302, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.4851261411765414e-05, |
|
"loss": 0.9309, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.375438886228411e-05, |
|
"loss": 0.9354, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.2677962460007555e-05, |
|
"loss": 0.9429, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.162550608309446e-05, |
|
"loss": 0.9209, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.060046513945361e-05, |
|
"loss": 0.9304, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.4105572700500488, |
|
"eval_runtime": 11.4541, |
|
"eval_samples_per_second": 26.191, |
|
"eval_steps_per_second": 26.191, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.606195287572577e-06, |
|
"loss": 0.7909, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 8.645951451157741e-06, |
|
"loss": 0.5917, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 7.72287716354776e-06, |
|
"loss": 0.5678, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 6.8399942767839075e-06, |
|
"loss": 0.5837, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 6.000193069026181e-06, |
|
"loss": 0.5701, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 5.206222782700667e-06, |
|
"loss": 0.5467, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.460682624352952e-06, |
|
"loss": 0.5695, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.766013255671479e-06, |
|
"loss": 0.5557, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.1244888035362875e-06, |
|
"loss": 0.5468, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.5382094152499705e-06, |
|
"loss": 0.5793, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.009094383322356e-06, |
|
"loss": 0.5462, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.5388758623164802e-06, |
|
"loss": 0.5617, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.1290931983246334e-06, |
|
"loss": 0.5574, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 7.810878896382101e-07, |
|
"loss": 0.5632, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.959991951083498e-07, |
|
"loss": 0.57, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.747604045743102e-07, |
|
"loss": 0.5498, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.180957835689478e-07, |
|
"loss": 0.5369, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.651820230338942e-08, |
|
"loss": 0.5651, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.5518141984939575, |
|
"eval_runtime": 11.4288, |
|
"eval_samples_per_second": 26.25, |
|
"eval_steps_per_second": 26.25, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"step": 1098, |
|
"total_flos": 6.035394717233971e+16, |
|
"train_loss": 0.9967951453231506, |
|
"train_runtime": 11703.2983, |
|
"train_samples_per_second": 3.761, |
|
"train_steps_per_second": 0.094 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1098, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 20, |
|
"total_flos": 6.035394717233971e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|