|
{ |
|
"best_metric": 1.990099549293518, |
|
"best_model_checkpoint": "./outputs/checkpoint-4000", |
|
"epoch": 2.9143897996357016, |
|
"eval_steps": 100, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.7403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.642388343811035, |
|
"eval_runtime": 205.2517, |
|
"eval_samples_per_second": 30.567, |
|
"eval_steps_per_second": 3.825, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.606, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.5923426151275635, |
|
"eval_runtime": 208.3105, |
|
"eval_samples_per_second": 30.119, |
|
"eval_steps_per_second": 3.768, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5632, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.5577170848846436, |
|
"eval_runtime": 205.6257, |
|
"eval_samples_per_second": 30.512, |
|
"eval_steps_per_second": 3.818, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5369, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.5247888565063477, |
|
"eval_runtime": 205.7237, |
|
"eval_samples_per_second": 30.497, |
|
"eval_steps_per_second": 3.816, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4954, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.498934745788574, |
|
"eval_runtime": 207.1055, |
|
"eval_samples_per_second": 30.294, |
|
"eval_steps_per_second": 3.79, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 2.469, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.470606565475464, |
|
"eval_runtime": 205.8606, |
|
"eval_samples_per_second": 30.477, |
|
"eval_steps_per_second": 3.813, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4509, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.446812391281128, |
|
"eval_runtime": 205.2079, |
|
"eval_samples_per_second": 30.574, |
|
"eval_steps_per_second": 3.825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4284, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.42557954788208, |
|
"eval_runtime": 205.3478, |
|
"eval_samples_per_second": 30.553, |
|
"eval_steps_per_second": 3.823, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3916, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.403062343597412, |
|
"eval_runtime": 204.8842, |
|
"eval_samples_per_second": 30.622, |
|
"eval_steps_per_second": 3.831, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3883, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.384012222290039, |
|
"eval_runtime": 204.4165, |
|
"eval_samples_per_second": 30.692, |
|
"eval_steps_per_second": 3.84, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 2.382, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.361379384994507, |
|
"eval_runtime": 204.6076, |
|
"eval_samples_per_second": 30.664, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3372, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.3411779403686523, |
|
"eval_runtime": 204.8024, |
|
"eval_samples_per_second": 30.634, |
|
"eval_steps_per_second": 3.833, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3496, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.3248472213745117, |
|
"eval_runtime": 204.1277, |
|
"eval_samples_per_second": 30.736, |
|
"eval_steps_per_second": 3.846, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3001, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.301698684692383, |
|
"eval_runtime": 204.3213, |
|
"eval_samples_per_second": 30.707, |
|
"eval_steps_per_second": 3.842, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2598, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.284607172012329, |
|
"eval_runtime": 204.5306, |
|
"eval_samples_per_second": 30.675, |
|
"eval_steps_per_second": 3.838, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2518, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.2680861949920654, |
|
"eval_runtime": 204.3902, |
|
"eval_samples_per_second": 30.696, |
|
"eval_steps_per_second": 3.841, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2518, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.2509853839874268, |
|
"eval_runtime": 204.5985, |
|
"eval_samples_per_second": 30.665, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2139, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.2367281913757324, |
|
"eval_runtime": 206.3249, |
|
"eval_samples_per_second": 30.408, |
|
"eval_steps_per_second": 3.805, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1986, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.222033977508545, |
|
"eval_runtime": 205.0156, |
|
"eval_samples_per_second": 30.603, |
|
"eval_steps_per_second": 3.829, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1927, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.2064921855926514, |
|
"eval_runtime": 205.4048, |
|
"eval_samples_per_second": 30.545, |
|
"eval_steps_per_second": 3.822, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1762, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.1937856674194336, |
|
"eval_runtime": 204.8721, |
|
"eval_samples_per_second": 30.624, |
|
"eval_steps_per_second": 3.832, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1584, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.1781294345855713, |
|
"eval_runtime": 204.973, |
|
"eval_samples_per_second": 30.609, |
|
"eval_steps_per_second": 3.83, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1481, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.1652047634124756, |
|
"eval_runtime": 204.8186, |
|
"eval_samples_per_second": 30.632, |
|
"eval_steps_per_second": 3.833, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1553, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.1539299488067627, |
|
"eval_runtime": 204.9017, |
|
"eval_samples_per_second": 30.62, |
|
"eval_steps_per_second": 3.831, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1242, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.140552520751953, |
|
"eval_runtime": 204.6745, |
|
"eval_samples_per_second": 30.654, |
|
"eval_steps_per_second": 3.835, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1241, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.129112482070923, |
|
"eval_runtime": 204.7891, |
|
"eval_samples_per_second": 30.636, |
|
"eval_steps_per_second": 3.833, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 2.108, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.1130168437957764, |
|
"eval_runtime": 204.8827, |
|
"eval_samples_per_second": 30.622, |
|
"eval_steps_per_second": 3.831, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0624, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 2.1079933643341064, |
|
"eval_runtime": 205.1262, |
|
"eval_samples_per_second": 30.586, |
|
"eval_steps_per_second": 3.827, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0368, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.0953664779663086, |
|
"eval_runtime": 204.7517, |
|
"eval_samples_per_second": 30.642, |
|
"eval_steps_per_second": 3.834, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0563, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.0847907066345215, |
|
"eval_runtime": 205.0373, |
|
"eval_samples_per_second": 30.599, |
|
"eval_steps_per_second": 3.829, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0254, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.0736167430877686, |
|
"eval_runtime": 205.2299, |
|
"eval_samples_per_second": 30.571, |
|
"eval_steps_per_second": 3.825, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0397, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.0629310607910156, |
|
"eval_runtime": 205.2704, |
|
"eval_samples_per_second": 30.565, |
|
"eval_steps_per_second": 3.824, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0083, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.0515356063842773, |
|
"eval_runtime": 205.1563, |
|
"eval_samples_per_second": 30.582, |
|
"eval_steps_per_second": 3.826, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0018, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 2.0455892086029053, |
|
"eval_runtime": 280.8694, |
|
"eval_samples_per_second": 22.338, |
|
"eval_steps_per_second": 2.795, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 2.0334508419036865, |
|
"eval_runtime": 220.5599, |
|
"eval_samples_per_second": 28.446, |
|
"eval_steps_per_second": 3.559, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9962, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.0254855155944824, |
|
"eval_runtime": 204.9938, |
|
"eval_samples_per_second": 30.606, |
|
"eval_steps_per_second": 3.829, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9767, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 2.015181303024292, |
|
"eval_runtime": 205.3577, |
|
"eval_samples_per_second": 30.552, |
|
"eval_steps_per_second": 3.823, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9679, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 2.00793194770813, |
|
"eval_runtime": 205.2214, |
|
"eval_samples_per_second": 30.572, |
|
"eval_steps_per_second": 3.825, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9555, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.9973456859588623, |
|
"eval_runtime": 205.4846, |
|
"eval_samples_per_second": 30.533, |
|
"eval_steps_per_second": 3.82, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9549, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.990099549293518, |
|
"eval_runtime": 205.2933, |
|
"eval_samples_per_second": 30.561, |
|
"eval_steps_per_second": 3.824, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.1667957018181632e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|