|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.07663422484481569, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007663422484481569, |
|
"grad_norm": 2411.532470703125, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 152.2018, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.015326844968963138, |
|
"grad_norm": 1508.88134765625, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 142.6181, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.022990267453444707, |
|
"grad_norm": 1164.76953125, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 123.6666, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.030653689937926276, |
|
"grad_norm": 1428.1673583984375, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 117.5532, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.038317112422407845, |
|
"grad_norm": 421.33551025390625, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 113.8976, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.045980534906889414, |
|
"grad_norm": 302.48614501953125, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 106.6907, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05364395739137098, |
|
"grad_norm": 221.99728393554688, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 108.7462, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06130737987585255, |
|
"grad_norm": 630.5855102539062, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 88.6301, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06897080236033412, |
|
"grad_norm": 749.811279296875, |
|
"learning_rate": 5e-06, |
|
"loss": 91.8458, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07663422484481569, |
|
"grad_norm": 168.46031188964844, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 94.57, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 9000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|