File size: 1,699 Bytes
3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 b96322d 3978262 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.3786909878253937,
"learning_rate": 0.0002,
"loss": 0.7736,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.49707138538360596,
"learning_rate": 0.0002,
"loss": 0.3865,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.5888916850090027,
"learning_rate": 0.0002,
"loss": 0.2454,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.49393707513809204,
"learning_rate": 0.0002,
"loss": 0.1871,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.33486297726631165,
"learning_rate": 0.0002,
"loss": 0.1601,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.4099717438220978,
"learning_rate": 0.0002,
"loss": 0.1414,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.028449026965504e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|