|
{ |
|
"best_metric": 2.0894112586975098, |
|
"best_model_checkpoint": "./outputs/checkpoint-3700", |
|
"epoch": 2.6958105646630237, |
|
"eval_steps": 100, |
|
"global_step": 3700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.8181, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.7240540981292725, |
|
"eval_runtime": 204.8489, |
|
"eval_samples_per_second": 30.627, |
|
"eval_steps_per_second": 3.832, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6957, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.6725997924804688, |
|
"eval_runtime": 204.875, |
|
"eval_samples_per_second": 30.624, |
|
"eval_steps_per_second": 3.832, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6509, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.6377696990966797, |
|
"eval_runtime": 204.8779, |
|
"eval_samples_per_second": 30.623, |
|
"eval_steps_per_second": 3.832, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6248, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.6048855781555176, |
|
"eval_runtime": 204.7551, |
|
"eval_samples_per_second": 30.641, |
|
"eval_steps_per_second": 3.834, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5822, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.5791709423065186, |
|
"eval_runtime": 204.976, |
|
"eval_samples_per_second": 30.608, |
|
"eval_steps_per_second": 3.83, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5557, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.5526647567749023, |
|
"eval_runtime": 208.5751, |
|
"eval_samples_per_second": 30.08, |
|
"eval_steps_per_second": 3.764, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5397, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.5280492305755615, |
|
"eval_runtime": 204.8306, |
|
"eval_samples_per_second": 30.63, |
|
"eval_steps_per_second": 3.832, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5165, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.507997512817383, |
|
"eval_runtime": 204.6726, |
|
"eval_samples_per_second": 30.654, |
|
"eval_steps_per_second": 3.835, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 2.478, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.485630512237549, |
|
"eval_runtime": 204.5609, |
|
"eval_samples_per_second": 30.671, |
|
"eval_steps_per_second": 3.837, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4748, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.4663589000701904, |
|
"eval_runtime": 204.9278, |
|
"eval_samples_per_second": 30.616, |
|
"eval_steps_per_second": 3.831, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4692, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.4438841342926025, |
|
"eval_runtime": 204.9158, |
|
"eval_samples_per_second": 30.617, |
|
"eval_steps_per_second": 3.831, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4221, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.42663836479187, |
|
"eval_runtime": 204.7233, |
|
"eval_samples_per_second": 30.646, |
|
"eval_steps_per_second": 3.834, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 2.436, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.4082868099212646, |
|
"eval_runtime": 204.6174, |
|
"eval_samples_per_second": 30.662, |
|
"eval_steps_per_second": 3.836, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3811, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.3840975761413574, |
|
"eval_runtime": 204.8146, |
|
"eval_samples_per_second": 30.633, |
|
"eval_steps_per_second": 3.833, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3425, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.3692121505737305, |
|
"eval_runtime": 204.8938, |
|
"eval_samples_per_second": 30.621, |
|
"eval_steps_per_second": 3.831, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3349, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.3504340648651123, |
|
"eval_runtime": 205.0412, |
|
"eval_samples_per_second": 30.599, |
|
"eval_steps_per_second": 3.828, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3358, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.3324663639068604, |
|
"eval_runtime": 205.0224, |
|
"eval_samples_per_second": 30.602, |
|
"eval_steps_per_second": 3.829, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2973, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.3180932998657227, |
|
"eval_runtime": 204.7898, |
|
"eval_samples_per_second": 30.636, |
|
"eval_steps_per_second": 3.833, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2809, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.302474021911621, |
|
"eval_runtime": 205.1402, |
|
"eval_samples_per_second": 30.584, |
|
"eval_steps_per_second": 3.827, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2762, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.28745436668396, |
|
"eval_runtime": 205.0203, |
|
"eval_samples_per_second": 30.602, |
|
"eval_steps_per_second": 3.829, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2577, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.273569107055664, |
|
"eval_runtime": 205.4686, |
|
"eval_samples_per_second": 30.535, |
|
"eval_steps_per_second": 3.821, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2406, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.258324384689331, |
|
"eval_runtime": 205.2879, |
|
"eval_samples_per_second": 30.562, |
|
"eval_steps_per_second": 3.824, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2328, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.246018409729004, |
|
"eval_runtime": 205.2551, |
|
"eval_samples_per_second": 30.567, |
|
"eval_steps_per_second": 3.825, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 2.236, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.2329859733581543, |
|
"eval_runtime": 205.4741, |
|
"eval_samples_per_second": 30.534, |
|
"eval_steps_per_second": 3.82, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2046, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.2189931869506836, |
|
"eval_runtime": 205.4106, |
|
"eval_samples_per_second": 30.544, |
|
"eval_steps_per_second": 3.822, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2048, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.206812858581543, |
|
"eval_runtime": 204.865, |
|
"eval_samples_per_second": 30.625, |
|
"eval_steps_per_second": 3.832, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 2.188, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.1901111602783203, |
|
"eval_runtime": 205.0397, |
|
"eval_samples_per_second": 30.599, |
|
"eval_steps_per_second": 3.829, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 2.145, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 2.1863455772399902, |
|
"eval_runtime": 205.3424, |
|
"eval_samples_per_second": 30.554, |
|
"eval_steps_per_second": 3.823, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 2.116, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.171632766723633, |
|
"eval_runtime": 205.19, |
|
"eval_samples_per_second": 30.577, |
|
"eval_steps_per_second": 3.826, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1341, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.162501096725464, |
|
"eval_runtime": 204.9372, |
|
"eval_samples_per_second": 30.614, |
|
"eval_steps_per_second": 3.83, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1015, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.149629831314087, |
|
"eval_runtime": 205.2556, |
|
"eval_samples_per_second": 30.567, |
|
"eval_steps_per_second": 3.825, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 2.122, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.139963388442993, |
|
"eval_runtime": 205.3965, |
|
"eval_samples_per_second": 30.546, |
|
"eval_steps_per_second": 3.822, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0835, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.128814935684204, |
|
"eval_runtime": 205.0918, |
|
"eval_samples_per_second": 30.591, |
|
"eval_steps_per_second": 3.828, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0783, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 2.1278133392333984, |
|
"eval_runtime": 205.2987, |
|
"eval_samples_per_second": 30.56, |
|
"eval_steps_per_second": 3.824, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0761, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 2.1079277992248535, |
|
"eval_runtime": 205.5563, |
|
"eval_samples_per_second": 30.522, |
|
"eval_steps_per_second": 3.819, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 2.076, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.09975004196167, |
|
"eval_runtime": 206.3028, |
|
"eval_samples_per_second": 30.412, |
|
"eval_steps_per_second": 3.805, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0554, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 2.0894112586975098, |
|
"eval_runtime": 205.5534, |
|
"eval_samples_per_second": 30.522, |
|
"eval_steps_per_second": 3.819, |
|
"step": 3700 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.0533333313456128e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|