kostiantynk's picture
Training in progress, step 200, checkpoint
74571e8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3766478342749529,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0018832391713747645,
"eval_loss": 0.7600103616714478,
"eval_runtime": 7.022,
"eval_samples_per_second": 15.95,
"eval_steps_per_second": 7.975,
"step": 1
},
{
"epoch": 0.018832391713747645,
"grad_norm": 0.6724773645401001,
"learning_rate": 0.0002,
"loss": 0.6227,
"step": 10
},
{
"epoch": 0.03766478342749529,
"grad_norm": 0.43917495012283325,
"learning_rate": 0.0002,
"loss": 0.4686,
"step": 20
},
{
"epoch": 0.05649717514124294,
"grad_norm": 0.5041059851646423,
"learning_rate": 0.0002,
"loss": 0.465,
"step": 30
},
{
"epoch": 0.07532956685499058,
"grad_norm": 0.5057150721549988,
"learning_rate": 0.0002,
"loss": 0.4692,
"step": 40
},
{
"epoch": 0.09416195856873823,
"grad_norm": 0.3785208463668823,
"learning_rate": 0.0002,
"loss": 0.461,
"step": 50
},
{
"epoch": 0.09416195856873823,
"eval_loss": 0.4497997760772705,
"eval_runtime": 5.9882,
"eval_samples_per_second": 18.703,
"eval_steps_per_second": 9.352,
"step": 50
},
{
"epoch": 0.11299435028248588,
"grad_norm": 0.41503942012786865,
"learning_rate": 0.0002,
"loss": 0.3951,
"step": 60
},
{
"epoch": 0.1318267419962335,
"grad_norm": 0.4685976505279541,
"learning_rate": 0.0002,
"loss": 0.4604,
"step": 70
},
{
"epoch": 0.15065913370998116,
"grad_norm": 0.49280235171318054,
"learning_rate": 0.0002,
"loss": 0.4003,
"step": 80
},
{
"epoch": 0.1694915254237288,
"grad_norm": 0.35098376870155334,
"learning_rate": 0.0002,
"loss": 0.3999,
"step": 90
},
{
"epoch": 0.18832391713747645,
"grad_norm": 0.39404723048210144,
"learning_rate": 0.0002,
"loss": 0.4568,
"step": 100
},
{
"epoch": 0.18832391713747645,
"eval_loss": 0.4238744378089905,
"eval_runtime": 5.972,
"eval_samples_per_second": 18.754,
"eval_steps_per_second": 9.377,
"step": 100
},
{
"epoch": 0.2071563088512241,
"grad_norm": 0.5711905360221863,
"learning_rate": 0.0002,
"loss": 0.4179,
"step": 110
},
{
"epoch": 0.22598870056497175,
"grad_norm": 0.5030353665351868,
"learning_rate": 0.0002,
"loss": 0.4445,
"step": 120
},
{
"epoch": 0.2448210922787194,
"grad_norm": 0.407720685005188,
"learning_rate": 0.0002,
"loss": 0.4066,
"step": 130
},
{
"epoch": 0.263653483992467,
"grad_norm": 0.41843506693840027,
"learning_rate": 0.0002,
"loss": 0.4076,
"step": 140
},
{
"epoch": 0.2824858757062147,
"grad_norm": 0.6799523234367371,
"learning_rate": 0.0002,
"loss": 0.4233,
"step": 150
},
{
"epoch": 0.2824858757062147,
"eval_loss": 0.41013070940971375,
"eval_runtime": 5.9781,
"eval_samples_per_second": 18.735,
"eval_steps_per_second": 9.368,
"step": 150
},
{
"epoch": 0.3013182674199623,
"grad_norm": 0.4201280474662781,
"learning_rate": 0.0002,
"loss": 0.4012,
"step": 160
},
{
"epoch": 0.32015065913371,
"grad_norm": 0.47408103942871094,
"learning_rate": 0.0002,
"loss": 0.3687,
"step": 170
},
{
"epoch": 0.3389830508474576,
"grad_norm": 0.41946759819984436,
"learning_rate": 0.0002,
"loss": 0.4167,
"step": 180
},
{
"epoch": 0.3578154425612053,
"grad_norm": 0.43409135937690735,
"learning_rate": 0.0002,
"loss": 0.3949,
"step": 190
},
{
"epoch": 0.3766478342749529,
"grad_norm": 0.38029003143310547,
"learning_rate": 0.0002,
"loss": 0.4432,
"step": 200
},
{
"epoch": 0.3766478342749529,
"eval_loss": 0.3980959951877594,
"eval_runtime": 5.9981,
"eval_samples_per_second": 18.673,
"eval_steps_per_second": 9.336,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.065823967051776e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}