llama-3.2-3B-lora-dummy-32 / trainer_state.json
sizhkhy's picture
Upload folder using huggingface_hub
06eca5e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.992,
"eval_steps": 5,
"global_step": 31,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.032,
"grad_norm": 0.06547047197818756,
"learning_rate": 2.5e-05,
"loss": 0.0571,
"step": 1
},
{
"epoch": 0.064,
"grad_norm": 0.06732479482889175,
"learning_rate": 5e-05,
"loss": 0.0552,
"step": 2
},
{
"epoch": 0.096,
"grad_norm": 0.061726897954940796,
"learning_rate": 7.500000000000001e-05,
"loss": 0.0524,
"step": 3
},
{
"epoch": 0.128,
"grad_norm": 0.05993999168276787,
"learning_rate": 0.0001,
"loss": 0.0527,
"step": 4
},
{
"epoch": 0.16,
"grad_norm": 0.04297811910510063,
"learning_rate": 9.966191788709716e-05,
"loss": 0.0452,
"step": 5
},
{
"epoch": 0.16,
"eval_loss": 0.04343939200043678,
"eval_runtime": 1.3959,
"eval_samples_per_second": 5.731,
"eval_steps_per_second": 1.433,
"step": 5
},
{
"epoch": 0.192,
"grad_norm": 0.03668003901839256,
"learning_rate": 9.865224352899119e-05,
"loss": 0.0387,
"step": 6
},
{
"epoch": 0.224,
"grad_norm": 0.04254748299717903,
"learning_rate": 9.698463103929542e-05,
"loss": 0.0373,
"step": 7
},
{
"epoch": 0.256,
"grad_norm": 0.03773832321166992,
"learning_rate": 9.468163201617062e-05,
"loss": 0.0353,
"step": 8
},
{
"epoch": 0.288,
"grad_norm": 0.03248106688261032,
"learning_rate": 9.177439057064683e-05,
"loss": 0.0379,
"step": 9
},
{
"epoch": 0.32,
"grad_norm": 0.029653361067175865,
"learning_rate": 8.83022221559489e-05,
"loss": 0.0349,
"step": 10
},
{
"epoch": 0.32,
"eval_loss": 0.03697124496102333,
"eval_runtime": 1.3747,
"eval_samples_per_second": 5.82,
"eval_steps_per_second": 1.455,
"step": 10
},
{
"epoch": 0.352,
"grad_norm": 0.02675388567149639,
"learning_rate": 8.43120818934367e-05,
"loss": 0.0324,
"step": 11
},
{
"epoch": 0.384,
"grad_norm": 0.02256765589118004,
"learning_rate": 7.985792958513931e-05,
"loss": 0.0334,
"step": 12
},
{
"epoch": 0.416,
"grad_norm": 0.021855270490050316,
"learning_rate": 7.500000000000001e-05,
"loss": 0.0313,
"step": 13
},
{
"epoch": 0.448,
"grad_norm": 0.019800638779997826,
"learning_rate": 6.980398830195785e-05,
"loss": 0.0262,
"step": 14
},
{
"epoch": 0.48,
"grad_norm": 0.020864585414528847,
"learning_rate": 6.434016163555452e-05,
"loss": 0.031,
"step": 15
},
{
"epoch": 0.48,
"eval_loss": 0.0329812727868557,
"eval_runtime": 1.3771,
"eval_samples_per_second": 5.809,
"eval_steps_per_second": 1.452,
"step": 15
},
{
"epoch": 0.512,
"grad_norm": 0.019576789811253548,
"learning_rate": 5.868240888334653e-05,
"loss": 0.0305,
"step": 16
},
{
"epoch": 0.544,
"grad_norm": 0.020052531734108925,
"learning_rate": 5.290724144552379e-05,
"loss": 0.0283,
"step": 17
},
{
"epoch": 0.576,
"grad_norm": 0.019002581015229225,
"learning_rate": 4.709275855447621e-05,
"loss": 0.0293,
"step": 18
},
{
"epoch": 0.608,
"grad_norm": 0.02045363001525402,
"learning_rate": 4.131759111665349e-05,
"loss": 0.0265,
"step": 19
},
{
"epoch": 0.64,
"grad_norm": 0.018427610397338867,
"learning_rate": 3.5659838364445505e-05,
"loss": 0.0258,
"step": 20
},
{
"epoch": 0.64,
"eval_loss": 0.031122902408242226,
"eval_runtime": 1.38,
"eval_samples_per_second": 5.797,
"eval_steps_per_second": 1.449,
"step": 20
},
{
"epoch": 0.672,
"grad_norm": 0.01990111917257309,
"learning_rate": 3.019601169804216e-05,
"loss": 0.0304,
"step": 21
},
{
"epoch": 0.704,
"grad_norm": 0.019229382276535034,
"learning_rate": 2.500000000000001e-05,
"loss": 0.0285,
"step": 22
},
{
"epoch": 0.736,
"grad_norm": 0.0193803608417511,
"learning_rate": 2.0142070414860704e-05,
"loss": 0.0306,
"step": 23
},
{
"epoch": 0.768,
"grad_norm": 0.01592605747282505,
"learning_rate": 1.5687918106563326e-05,
"loss": 0.027,
"step": 24
},
{
"epoch": 0.8,
"grad_norm": 0.01785469613969326,
"learning_rate": 1.1697777844051105e-05,
"loss": 0.0277,
"step": 25
},
{
"epoch": 0.8,
"eval_loss": 0.03004794754087925,
"eval_runtime": 1.3771,
"eval_samples_per_second": 5.81,
"eval_steps_per_second": 1.452,
"step": 25
},
{
"epoch": 0.832,
"grad_norm": 0.0174510907381773,
"learning_rate": 8.225609429353187e-06,
"loss": 0.0287,
"step": 26
},
{
"epoch": 0.864,
"grad_norm": 0.016350215300917625,
"learning_rate": 5.318367983829392e-06,
"loss": 0.0275,
"step": 27
},
{
"epoch": 0.896,
"grad_norm": 0.015370819717645645,
"learning_rate": 3.0153689607045845e-06,
"loss": 0.0238,
"step": 28
},
{
"epoch": 0.928,
"grad_norm": 0.016408780589699745,
"learning_rate": 1.3477564710088098e-06,
"loss": 0.0283,
"step": 29
},
{
"epoch": 0.96,
"grad_norm": 0.016846995800733566,
"learning_rate": 3.380821129028489e-07,
"loss": 0.0261,
"step": 30
},
{
"epoch": 0.96,
"eval_loss": 0.029837772250175476,
"eval_runtime": 1.3754,
"eval_samples_per_second": 5.817,
"eval_steps_per_second": 1.454,
"step": 30
},
{
"epoch": 0.992,
"grad_norm": 0.017581390216946602,
"learning_rate": 0.0,
"loss": 0.0283,
"step": 31
},
{
"epoch": 0.992,
"step": 31,
"total_flos": 5.244736447827149e+16,
"train_loss": 0.033818339808813984,
"train_runtime": 413.5784,
"train_samples_per_second": 2.418,
"train_steps_per_second": 0.075
}
],
"logging_steps": 1,
"max_steps": 31,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.244736447827149e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}