|
{ |
|
"best_metric": 2.044205904006958, |
|
"best_model_checkpoint": "./outputs/checkpoint-4100", |
|
"epoch": 2.987249544626594, |
|
"eval_steps": 100, |
|
"global_step": 4100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.8176, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.7248005867004395, |
|
"eval_runtime": 204.526, |
|
"eval_samples_per_second": 30.676, |
|
"eval_steps_per_second": 3.838, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6959, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.6743462085723877, |
|
"eval_runtime": 205.6888, |
|
"eval_samples_per_second": 30.502, |
|
"eval_steps_per_second": 3.816, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6512, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.6400036811828613, |
|
"eval_runtime": 204.5763, |
|
"eval_samples_per_second": 30.668, |
|
"eval_steps_per_second": 3.837, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6241, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.6055357456207275, |
|
"eval_runtime": 204.4295, |
|
"eval_samples_per_second": 30.69, |
|
"eval_steps_per_second": 3.84, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5812, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.5787856578826904, |
|
"eval_runtime": 204.1968, |
|
"eval_samples_per_second": 30.725, |
|
"eval_steps_per_second": 3.844, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 2.553, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.552586555480957, |
|
"eval_runtime": 204.5367, |
|
"eval_samples_per_second": 30.674, |
|
"eval_steps_per_second": 3.838, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5368, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.527393341064453, |
|
"eval_runtime": 204.8117, |
|
"eval_samples_per_second": 30.633, |
|
"eval_steps_per_second": 3.833, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5124, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.506849527359009, |
|
"eval_runtime": 204.0454, |
|
"eval_samples_per_second": 30.748, |
|
"eval_steps_per_second": 3.847, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4756, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.4826207160949707, |
|
"eval_runtime": 204.4447, |
|
"eval_samples_per_second": 30.688, |
|
"eval_steps_per_second": 3.84, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4724, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.463895559310913, |
|
"eval_runtime": 204.6362, |
|
"eval_samples_per_second": 30.659, |
|
"eval_steps_per_second": 3.836, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4666, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.439748525619507, |
|
"eval_runtime": 204.5892, |
|
"eval_samples_per_second": 30.666, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4189, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.421973943710327, |
|
"eval_runtime": 204.4873, |
|
"eval_samples_per_second": 30.682, |
|
"eval_steps_per_second": 3.839, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4317, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.4035205841064453, |
|
"eval_runtime": 204.5174, |
|
"eval_samples_per_second": 30.677, |
|
"eval_steps_per_second": 3.838, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3762, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.379911422729492, |
|
"eval_runtime": 204.9943, |
|
"eval_samples_per_second": 30.606, |
|
"eval_steps_per_second": 3.829, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 2.337, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.3603107929229736, |
|
"eval_runtime": 204.9808, |
|
"eval_samples_per_second": 30.608, |
|
"eval_steps_per_second": 3.83, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3297, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.3435933589935303, |
|
"eval_runtime": 204.6503, |
|
"eval_samples_per_second": 30.657, |
|
"eval_steps_per_second": 3.836, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3291, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.325985908508301, |
|
"eval_runtime": 204.8064, |
|
"eval_samples_per_second": 30.634, |
|
"eval_steps_per_second": 3.833, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2916, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.3098464012145996, |
|
"eval_runtime": 204.5629, |
|
"eval_samples_per_second": 30.67, |
|
"eval_steps_per_second": 3.837, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2727, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.2953364849090576, |
|
"eval_runtime": 204.8297, |
|
"eval_samples_per_second": 30.63, |
|
"eval_steps_per_second": 3.832, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2697, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.279343843460083, |
|
"eval_runtime": 205.0674, |
|
"eval_samples_per_second": 30.595, |
|
"eval_steps_per_second": 3.828, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2494, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.266407012939453, |
|
"eval_runtime": 204.6679, |
|
"eval_samples_per_second": 30.655, |
|
"eval_steps_per_second": 3.835, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 2.232, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.24958872795105, |
|
"eval_runtime": 204.8091, |
|
"eval_samples_per_second": 30.633, |
|
"eval_steps_per_second": 3.833, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2233, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.2403359413146973, |
|
"eval_runtime": 204.8335, |
|
"eval_samples_per_second": 30.63, |
|
"eval_steps_per_second": 3.832, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2295, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.2235193252563477, |
|
"eval_runtime": 204.6216, |
|
"eval_samples_per_second": 30.661, |
|
"eval_steps_per_second": 3.836, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1957, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.2098121643066406, |
|
"eval_runtime": 204.6979, |
|
"eval_samples_per_second": 30.65, |
|
"eval_steps_per_second": 3.835, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1964, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.197718620300293, |
|
"eval_runtime": 204.9105, |
|
"eval_samples_per_second": 30.618, |
|
"eval_steps_per_second": 3.831, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1798, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.1822831630706787, |
|
"eval_runtime": 204.5843, |
|
"eval_samples_per_second": 30.667, |
|
"eval_steps_per_second": 3.837, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 2.136, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 2.1763219833374023, |
|
"eval_runtime": 204.7219, |
|
"eval_samples_per_second": 30.646, |
|
"eval_steps_per_second": 3.834, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1076, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.1633405685424805, |
|
"eval_runtime": 205.0465, |
|
"eval_samples_per_second": 30.598, |
|
"eval_steps_per_second": 3.828, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1237, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.152331590652466, |
|
"eval_runtime": 204.9684, |
|
"eval_samples_per_second": 30.61, |
|
"eval_steps_per_second": 3.83, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0927, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.139225959777832, |
|
"eval_runtime": 205.1929, |
|
"eval_samples_per_second": 30.576, |
|
"eval_steps_per_second": 3.826, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1127, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.1339216232299805, |
|
"eval_runtime": 205.3153, |
|
"eval_samples_per_second": 30.558, |
|
"eval_steps_per_second": 3.823, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0736, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.1192281246185303, |
|
"eval_runtime": 208.2692, |
|
"eval_samples_per_second": 30.124, |
|
"eval_steps_per_second": 3.769, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0687, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 2.114696979522705, |
|
"eval_runtime": 205.5149, |
|
"eval_samples_per_second": 30.528, |
|
"eval_steps_per_second": 3.82, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0674, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 2.0993354320526123, |
|
"eval_runtime": 205.1654, |
|
"eval_samples_per_second": 30.58, |
|
"eval_steps_per_second": 3.826, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0638, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.08911395072937, |
|
"eval_runtime": 206.3208, |
|
"eval_samples_per_second": 30.409, |
|
"eval_steps_per_second": 3.805, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0461, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 2.078380823135376, |
|
"eval_runtime": 205.4384, |
|
"eval_samples_per_second": 30.54, |
|
"eval_steps_per_second": 3.821, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0322, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 2.0717663764953613, |
|
"eval_runtime": 205.3942, |
|
"eval_samples_per_second": 30.546, |
|
"eval_steps_per_second": 3.822, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0247, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 2.0614216327667236, |
|
"eval_runtime": 205.4985, |
|
"eval_samples_per_second": 30.531, |
|
"eval_steps_per_second": 3.82, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0234, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 2.053968906402588, |
|
"eval_runtime": 205.7574, |
|
"eval_samples_per_second": 30.492, |
|
"eval_steps_per_second": 3.815, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0268, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 2.044205904006958, |
|
"eval_runtime": 205.9228, |
|
"eval_samples_per_second": 30.468, |
|
"eval_steps_per_second": 3.812, |
|
"step": 4100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.1671768877819904e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|