|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3766478342749529, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018832391713747645, |
|
"eval_loss": 0.7600103616714478, |
|
"eval_runtime": 7.022, |
|
"eval_samples_per_second": 15.95, |
|
"eval_steps_per_second": 7.975, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.018832391713747645, |
|
"grad_norm": 0.6724773645401001, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6227, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03766478342749529, |
|
"grad_norm": 0.43917495012283325, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4686, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05649717514124294, |
|
"grad_norm": 0.5041059851646423, |
|
"learning_rate": 0.0002, |
|
"loss": 0.465, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07532956685499058, |
|
"grad_norm": 0.5057150721549988, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4692, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09416195856873823, |
|
"grad_norm": 0.3785208463668823, |
|
"learning_rate": 0.0002, |
|
"loss": 0.461, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09416195856873823, |
|
"eval_loss": 0.4497997760772705, |
|
"eval_runtime": 5.9882, |
|
"eval_samples_per_second": 18.703, |
|
"eval_steps_per_second": 9.352, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11299435028248588, |
|
"grad_norm": 0.41503942012786865, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3951, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1318267419962335, |
|
"grad_norm": 0.4685976505279541, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4604, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15065913370998116, |
|
"grad_norm": 0.49280235171318054, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4003, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1694915254237288, |
|
"grad_norm": 0.35098376870155334, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3999, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18832391713747645, |
|
"grad_norm": 0.39404723048210144, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4568, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18832391713747645, |
|
"eval_loss": 0.4238744378089905, |
|
"eval_runtime": 5.972, |
|
"eval_samples_per_second": 18.754, |
|
"eval_steps_per_second": 9.377, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2071563088512241, |
|
"grad_norm": 0.5711905360221863, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4179, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22598870056497175, |
|
"grad_norm": 0.5030353665351868, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4445, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2448210922787194, |
|
"grad_norm": 0.407720685005188, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4066, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.263653483992467, |
|
"grad_norm": 0.41843506693840027, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4076, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2824858757062147, |
|
"grad_norm": 0.6799523234367371, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4233, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2824858757062147, |
|
"eval_loss": 0.41013070940971375, |
|
"eval_runtime": 5.9781, |
|
"eval_samples_per_second": 18.735, |
|
"eval_steps_per_second": 9.368, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3013182674199623, |
|
"grad_norm": 0.4201280474662781, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4012, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.32015065913371, |
|
"grad_norm": 0.47408103942871094, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3687, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 0.41946759819984436, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4167, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3578154425612053, |
|
"grad_norm": 0.43409135937690735, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3949, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3766478342749529, |
|
"grad_norm": 0.38029003143310547, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4432, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3766478342749529, |
|
"eval_loss": 0.3980959951877594, |
|
"eval_runtime": 5.9981, |
|
"eval_samples_per_second": 18.673, |
|
"eval_steps_per_second": 9.336, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.065823967051776e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|