|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.16096579476861167, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008048289738430584, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3834, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01609657947686117, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3102, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02414486921529175, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.000199658449300667, |
|
"loss": 2.3726, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03219315895372234, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 2.1816, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04024144869215292, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00019694002659393305, |
|
"loss": 2.4038, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0482897384305835, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 2.4681, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.056338028169014086, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00019157733266550575, |
|
"loss": 2.3357, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06438631790744467, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 2.3554, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07243460764587525, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00018371664782625287, |
|
"loss": 2.2722, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08048289738430583, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 2.3479, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08853118712273642, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00017357239106731317, |
|
"loss": 2.4537, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.096579476861167, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 2.4485, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10462776659959759, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0001614212712689668, |
|
"loss": 2.4963, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.11267605633802817, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 2.3565, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12072434607645875, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00014759473930370736, |
|
"loss": 2.38, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.12877263581488935, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00014016954246529696, |
|
"loss": 2.4191, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13682092555331993, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00013246994692046836, |
|
"loss": 2.4504, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1448692152917505, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00012454854871407994, |
|
"loss": 2.4143, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1529175050301811, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00011645945902807341, |
|
"loss": 2.3919, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.16096579476861167, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 2.3508, |
|
"step": 20 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 40, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.666832784162816e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|