|
{ |
|
"best_metric": 2.4719741344451904, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.053966540744738264, |
|
"eval_steps": 5, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021586616297895305, |
|
"grad_norm": 0.4805165231227875, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.3053, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0021586616297895305, |
|
"eval_loss": 2.4950923919677734, |
|
"eval_runtime": 35.2547, |
|
"eval_samples_per_second": 5.531, |
|
"eval_steps_per_second": 2.78, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004317323259579061, |
|
"grad_norm": 0.5265409350395203, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.4779, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0064759848893685915, |
|
"grad_norm": 0.4193825423717499, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3878, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008634646519158122, |
|
"grad_norm": 0.4574205279350281, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.4617, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010793308148947653, |
|
"grad_norm": 0.5360931754112244, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5719, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010793308148947653, |
|
"eval_loss": 2.4947731494903564, |
|
"eval_runtime": 34.8268, |
|
"eval_samples_per_second": 5.599, |
|
"eval_steps_per_second": 2.814, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012951969778737183, |
|
"grad_norm": 0.5430278182029724, |
|
"learning_rate": 1.2e-05, |
|
"loss": 2.6329, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.015110631408526714, |
|
"grad_norm": 0.44120287895202637, |
|
"learning_rate": 1.4e-05, |
|
"loss": 2.4466, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.017269293038316244, |
|
"grad_norm": 0.44740861654281616, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.2921, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.019427954668105773, |
|
"grad_norm": 0.482614666223526, |
|
"learning_rate": 1.8e-05, |
|
"loss": 2.4402, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.021586616297895305, |
|
"grad_norm": 0.4554874002933502, |
|
"learning_rate": 2e-05, |
|
"loss": 2.6203, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021586616297895305, |
|
"eval_loss": 2.4917423725128174, |
|
"eval_runtime": 33.0385, |
|
"eval_samples_per_second": 5.902, |
|
"eval_steps_per_second": 2.966, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.023745277927684834, |
|
"grad_norm": 0.4039216935634613, |
|
"learning_rate": 1.9781476007338058e-05, |
|
"loss": 2.3033, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.025903939557474366, |
|
"grad_norm": 0.4976852536201477, |
|
"learning_rate": 1.913545457642601e-05, |
|
"loss": 2.5265, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.028062601187263895, |
|
"grad_norm": 0.40776801109313965, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 2.4908, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.030221262817053427, |
|
"grad_norm": 0.49444299936294556, |
|
"learning_rate": 1.6691306063588583e-05, |
|
"loss": 2.476, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.032379924446842956, |
|
"grad_norm": 0.6038114428520203, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 2.3912, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.032379924446842956, |
|
"eval_loss": 2.4828007221221924, |
|
"eval_runtime": 34.2086, |
|
"eval_samples_per_second": 5.7, |
|
"eval_steps_per_second": 2.865, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03453858607663249, |
|
"grad_norm": 0.5087575912475586, |
|
"learning_rate": 1.3090169943749475e-05, |
|
"loss": 2.6166, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03669724770642202, |
|
"grad_norm": 0.6161304712295532, |
|
"learning_rate": 1.1045284632676535e-05, |
|
"loss": 2.3413, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.038855909336211546, |
|
"grad_norm": 0.8429335951805115, |
|
"learning_rate": 8.954715367323468e-06, |
|
"loss": 2.8171, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04101457096600108, |
|
"grad_norm": 0.46091142296791077, |
|
"learning_rate": 6.909830056250527e-06, |
|
"loss": 2.2982, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04317323259579061, |
|
"grad_norm": 0.5474252700805664, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 2.563, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04317323259579061, |
|
"eval_loss": 2.474599599838257, |
|
"eval_runtime": 34.2783, |
|
"eval_samples_per_second": 5.689, |
|
"eval_steps_per_second": 2.859, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04533189422558014, |
|
"grad_norm": 0.4824274480342865, |
|
"learning_rate": 3.308693936411421e-06, |
|
"loss": 2.461, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04749055585536967, |
|
"grad_norm": 0.5804234147071838, |
|
"learning_rate": 1.9098300562505266e-06, |
|
"loss": 2.3954, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0496492174851592, |
|
"grad_norm": 0.4935641884803772, |
|
"learning_rate": 8.645454235739903e-07, |
|
"loss": 2.5672, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05180787911494873, |
|
"grad_norm": 0.5770930051803589, |
|
"learning_rate": 2.1852399266194312e-07, |
|
"loss": 2.5701, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.053966540744738264, |
|
"grad_norm": 0.590212881565094, |
|
"learning_rate": 0.0, |
|
"loss": 2.4887, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.053966540744738264, |
|
"eval_loss": 2.4719741344451904, |
|
"eval_runtime": 33.3118, |
|
"eval_samples_per_second": 5.854, |
|
"eval_steps_per_second": 2.942, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7915510431744000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|