|
{ |
|
"best_metric": 2.197718620300293, |
|
"best_model_checkpoint": "./outputs/checkpoint-2600", |
|
"epoch": 1.8943533697632058, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.8176, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.7248005867004395, |
|
"eval_runtime": 204.526, |
|
"eval_samples_per_second": 30.676, |
|
"eval_steps_per_second": 3.838, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6959, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.6743462085723877, |
|
"eval_runtime": 205.6888, |
|
"eval_samples_per_second": 30.502, |
|
"eval_steps_per_second": 3.816, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6512, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.6400036811828613, |
|
"eval_runtime": 204.5763, |
|
"eval_samples_per_second": 30.668, |
|
"eval_steps_per_second": 3.837, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6241, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.6055357456207275, |
|
"eval_runtime": 204.4295, |
|
"eval_samples_per_second": 30.69, |
|
"eval_steps_per_second": 3.84, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5812, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.5787856578826904, |
|
"eval_runtime": 204.1968, |
|
"eval_samples_per_second": 30.725, |
|
"eval_steps_per_second": 3.844, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 2.553, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.552586555480957, |
|
"eval_runtime": 204.5367, |
|
"eval_samples_per_second": 30.674, |
|
"eval_steps_per_second": 3.838, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5368, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.527393341064453, |
|
"eval_runtime": 204.8117, |
|
"eval_samples_per_second": 30.633, |
|
"eval_steps_per_second": 3.833, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5124, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.506849527359009, |
|
"eval_runtime": 204.0454, |
|
"eval_samples_per_second": 30.748, |
|
"eval_steps_per_second": 3.847, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4756, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.4826207160949707, |
|
"eval_runtime": 204.4447, |
|
"eval_samples_per_second": 30.688, |
|
"eval_steps_per_second": 3.84, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4724, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.463895559310913, |
|
"eval_runtime": 204.6362, |
|
"eval_samples_per_second": 30.659, |
|
"eval_steps_per_second": 3.836, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4666, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.439748525619507, |
|
"eval_runtime": 204.5892, |
|
"eval_samples_per_second": 30.666, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4189, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.421973943710327, |
|
"eval_runtime": 204.4873, |
|
"eval_samples_per_second": 30.682, |
|
"eval_steps_per_second": 3.839, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4317, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.4035205841064453, |
|
"eval_runtime": 204.5174, |
|
"eval_samples_per_second": 30.677, |
|
"eval_steps_per_second": 3.838, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3762, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.379911422729492, |
|
"eval_runtime": 204.9943, |
|
"eval_samples_per_second": 30.606, |
|
"eval_steps_per_second": 3.829, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 2.337, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.3603107929229736, |
|
"eval_runtime": 204.9808, |
|
"eval_samples_per_second": 30.608, |
|
"eval_steps_per_second": 3.83, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3297, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.3435933589935303, |
|
"eval_runtime": 204.6503, |
|
"eval_samples_per_second": 30.657, |
|
"eval_steps_per_second": 3.836, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3291, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.325985908508301, |
|
"eval_runtime": 204.8064, |
|
"eval_samples_per_second": 30.634, |
|
"eval_steps_per_second": 3.833, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2916, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.3098464012145996, |
|
"eval_runtime": 204.5629, |
|
"eval_samples_per_second": 30.67, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2727, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.2953364849090576, |
|
"eval_runtime": 204.8297, |
|
"eval_samples_per_second": 30.63, |
|
"eval_steps_per_second": 3.832, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2697, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.279343843460083, |
|
"eval_runtime": 205.0674, |
|
"eval_samples_per_second": 30.595, |
|
"eval_steps_per_second": 3.828, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2494, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.266407012939453, |
|
"eval_runtime": 204.6679, |
|
"eval_samples_per_second": 30.655, |
|
"eval_steps_per_second": 3.835, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 2.232, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.24958872795105, |
|
"eval_runtime": 204.8091, |
|
"eval_samples_per_second": 30.633, |
|
"eval_steps_per_second": 3.833, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2233, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.2403359413146973, |
|
"eval_runtime": 204.8335, |
|
"eval_samples_per_second": 30.63, |
|
"eval_steps_per_second": 3.832, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2295, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.2235193252563477, |
|
"eval_runtime": 204.6216, |
|
"eval_samples_per_second": 30.661, |
|
"eval_steps_per_second": 3.836, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1957, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.2098121643066406, |
|
"eval_runtime": 204.6979, |
|
"eval_samples_per_second": 30.65, |
|
"eval_steps_per_second": 3.835, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1964, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.197718620300293, |
|
"eval_runtime": 204.9105, |
|
"eval_samples_per_second": 30.618, |
|
"eval_steps_per_second": 3.831, |
|
"step": 2600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 7.408408364292096e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|