adapters-opt-bf16-QLORA-super_glue-axg
/
trainer_state-opt-fp16-QLORA-super_glue-axg-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 8.88888888888889, | |
"eval_steps": 1, | |
"global_step": 40, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.2222222222222222, | |
"grad_norm": 2.578125, | |
"learning_rate": 2.5e-05, | |
"loss": 0.663, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.2222222222222222, | |
"eval_accuracy": 0.6388888888888888, | |
"eval_loss": 0.6485392451286316, | |
"eval_runtime": 0.6135, | |
"eval_samples_per_second": 117.366, | |
"eval_steps_per_second": 8.15, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.4444444444444444, | |
"grad_norm": 8.875, | |
"learning_rate": 5e-05, | |
"loss": 0.756, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.4444444444444444, | |
"eval_accuracy": 0.6111111111111112, | |
"eval_loss": 0.6574299931526184, | |
"eval_runtime": 0.6097, | |
"eval_samples_per_second": 118.09, | |
"eval_steps_per_second": 8.201, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.6666666666666666, | |
"grad_norm": 4.46875, | |
"learning_rate": 4.868421052631579e-05, | |
"loss": 0.7238, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.6666666666666666, | |
"eval_accuracy": 0.5277777777777778, | |
"eval_loss": 0.7034233808517456, | |
"eval_runtime": 0.608, | |
"eval_samples_per_second": 118.427, | |
"eval_steps_per_second": 8.224, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.8888888888888888, | |
"grad_norm": 13.25, | |
"learning_rate": 4.736842105263158e-05, | |
"loss": 0.7122, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.8888888888888888, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.7725694179534912, | |
"eval_runtime": 0.6086, | |
"eval_samples_per_second": 118.309, | |
"eval_steps_per_second": 8.216, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.1111111111111112, | |
"grad_norm": 9.0, | |
"learning_rate": 4.605263157894737e-05, | |
"loss": 0.7642, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.1111111111111112, | |
"eval_accuracy": 0.4583333333333333, | |
"eval_loss": 0.8437364101409912, | |
"eval_runtime": 0.6086, | |
"eval_samples_per_second": 118.308, | |
"eval_steps_per_second": 8.216, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.3333333333333333, | |
"grad_norm": 11.5, | |
"learning_rate": 4.473684210526316e-05, | |
"loss": 0.7398, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.3333333333333333, | |
"eval_accuracy": 0.4722222222222222, | |
"eval_loss": 0.8946533203125, | |
"eval_runtime": 0.6095, | |
"eval_samples_per_second": 118.131, | |
"eval_steps_per_second": 8.204, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.5555555555555556, | |
"grad_norm": 7.34375, | |
"learning_rate": 4.342105263157895e-05, | |
"loss": 0.6496, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.5555555555555556, | |
"eval_accuracy": 0.4583333333333333, | |
"eval_loss": 0.9147610068321228, | |
"eval_runtime": 0.6082, | |
"eval_samples_per_second": 118.375, | |
"eval_steps_per_second": 8.22, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.7777777777777777, | |
"grad_norm": 8.9375, | |
"learning_rate": 4.210526315789474e-05, | |
"loss": 0.8145, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.7777777777777777, | |
"eval_accuracy": 0.4722222222222222, | |
"eval_loss": 0.8886311650276184, | |
"eval_runtime": 0.6076, | |
"eval_samples_per_second": 118.496, | |
"eval_steps_per_second": 8.229, | |
"step": 8 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 3.578125, | |
"learning_rate": 4.078947368421053e-05, | |
"loss": 0.7133, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.4583333333333333, | |
"eval_loss": 0.8463134765625, | |
"eval_runtime": 0.6081, | |
"eval_samples_per_second": 118.399, | |
"eval_steps_per_second": 8.222, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.2222222222222223, | |
"grad_norm": 4.9375, | |
"learning_rate": 3.9473684210526316e-05, | |
"loss": 0.7018, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.2222222222222223, | |
"eval_accuracy": 0.4861111111111111, | |
"eval_loss": 0.7938368320465088, | |
"eval_runtime": 0.6078, | |
"eval_samples_per_second": 118.455, | |
"eval_steps_per_second": 8.226, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.4444444444444446, | |
"grad_norm": 4.96875, | |
"learning_rate": 3.815789473684211e-05, | |
"loss": 0.7119, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.4444444444444446, | |
"eval_accuracy": 0.4861111111111111, | |
"eval_loss": 0.7457139492034912, | |
"eval_runtime": 0.6077, | |
"eval_samples_per_second": 118.489, | |
"eval_steps_per_second": 8.228, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.6666666666666665, | |
"grad_norm": 10.1875, | |
"learning_rate": 3.6842105263157895e-05, | |
"loss": 0.7619, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.6666666666666665, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.7044270634651184, | |
"eval_runtime": 0.608, | |
"eval_samples_per_second": 118.417, | |
"eval_steps_per_second": 8.223, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.888888888888889, | |
"grad_norm": 2.6875, | |
"learning_rate": 3.5526315789473684e-05, | |
"loss": 0.7001, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.888888888888889, | |
"eval_accuracy": 0.6111111111111112, | |
"eval_loss": 0.6843125820159912, | |
"eval_runtime": 0.608, | |
"eval_samples_per_second": 118.411, | |
"eval_steps_per_second": 8.223, | |
"step": 13 | |
}, | |
{ | |
"epoch": 3.111111111111111, | |
"grad_norm": 4.90625, | |
"learning_rate": 3.421052631578947e-05, | |
"loss": 0.7011, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.111111111111111, | |
"eval_accuracy": 0.6111111111111112, | |
"eval_loss": 0.6841227412223816, | |
"eval_runtime": 0.6072, | |
"eval_samples_per_second": 118.583, | |
"eval_steps_per_second": 8.235, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.3333333333333335, | |
"grad_norm": 7.15625, | |
"learning_rate": 3.289473684210527e-05, | |
"loss": 0.6936, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.3333333333333335, | |
"eval_accuracy": 0.6527777777777778, | |
"eval_loss": 0.6861708164215088, | |
"eval_runtime": 0.6089, | |
"eval_samples_per_second": 118.237, | |
"eval_steps_per_second": 8.211, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.5555555555555554, | |
"grad_norm": 3.125, | |
"learning_rate": 3.157894736842105e-05, | |
"loss": 0.6712, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.5555555555555554, | |
"eval_accuracy": 0.6388888888888888, | |
"eval_loss": 0.689697265625, | |
"eval_runtime": 0.6077, | |
"eval_samples_per_second": 118.487, | |
"eval_steps_per_second": 8.228, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.7777777777777777, | |
"grad_norm": 3.8125, | |
"learning_rate": 3.0263157894736844e-05, | |
"loss": 0.7103, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.7777777777777777, | |
"eval_accuracy": 0.6111111111111112, | |
"eval_loss": 0.6951768398284912, | |
"eval_runtime": 0.6084, | |
"eval_samples_per_second": 118.35, | |
"eval_steps_per_second": 8.219, | |
"step": 17 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 6.03125, | |
"learning_rate": 2.8947368421052634e-05, | |
"loss": 0.7363, | |
"step": 18 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.5972222222222222, | |
"eval_loss": 0.7005886435508728, | |
"eval_runtime": 0.6084, | |
"eval_samples_per_second": 118.351, | |
"eval_steps_per_second": 8.219, | |
"step": 18 | |
}, | |
{ | |
"epoch": 4.222222222222222, | |
"grad_norm": 11.1875, | |
"learning_rate": 2.7631578947368426e-05, | |
"loss": 0.6435, | |
"step": 19 | |
}, | |
{ | |
"epoch": 4.222222222222222, | |
"eval_accuracy": 0.5833333333333334, | |
"eval_loss": 0.7067057490348816, | |
"eval_runtime": 0.6096, | |
"eval_samples_per_second": 118.111, | |
"eval_steps_per_second": 8.202, | |
"step": 19 | |
}, | |
{ | |
"epoch": 4.444444444444445, | |
"grad_norm": 4.65625, | |
"learning_rate": 2.6315789473684212e-05, | |
"loss": 0.6981, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.444444444444445, | |
"eval_accuracy": 0.5833333333333334, | |
"eval_loss": 0.7121039628982544, | |
"eval_runtime": 0.6082, | |
"eval_samples_per_second": 118.379, | |
"eval_steps_per_second": 8.221, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.666666666666667, | |
"grad_norm": 2.703125, | |
"learning_rate": 2.5e-05, | |
"loss": 0.6926, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.666666666666667, | |
"eval_accuracy": 0.5416666666666666, | |
"eval_loss": 0.7211371660232544, | |
"eval_runtime": 0.6081, | |
"eval_samples_per_second": 118.398, | |
"eval_steps_per_second": 8.222, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.888888888888889, | |
"grad_norm": 5.15625, | |
"learning_rate": 2.368421052631579e-05, | |
"loss": 0.7108, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.888888888888889, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.73095703125, | |
"eval_runtime": 0.6083, | |
"eval_samples_per_second": 118.372, | |
"eval_steps_per_second": 8.22, | |
"step": 22 | |
}, | |
{ | |
"epoch": 5.111111111111111, | |
"grad_norm": 5.15625, | |
"learning_rate": 2.236842105263158e-05, | |
"loss": 0.672, | |
"step": 23 | |
}, | |
{ | |
"epoch": 5.111111111111111, | |
"eval_accuracy": 0.4861111111111111, | |
"eval_loss": 0.7393120527267456, | |
"eval_runtime": 0.6079, | |
"eval_samples_per_second": 118.437, | |
"eval_steps_per_second": 8.225, | |
"step": 23 | |
}, | |
{ | |
"epoch": 5.333333333333333, | |
"grad_norm": 3.140625, | |
"learning_rate": 2.105263157894737e-05, | |
"loss": 0.7141, | |
"step": 24 | |
}, | |
{ | |
"epoch": 5.333333333333333, | |
"eval_accuracy": 0.4722222222222222, | |
"eval_loss": 0.7498372197151184, | |
"eval_runtime": 0.6082, | |
"eval_samples_per_second": 118.379, | |
"eval_steps_per_second": 8.221, | |
"step": 24 | |
}, | |
{ | |
"epoch": 5.555555555555555, | |
"grad_norm": 2.359375, | |
"learning_rate": 1.9736842105263158e-05, | |
"loss": 0.6683, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.555555555555555, | |
"eval_accuracy": 0.4861111111111111, | |
"eval_loss": 0.7597113847732544, | |
"eval_runtime": 0.6096, | |
"eval_samples_per_second": 118.114, | |
"eval_steps_per_second": 8.202, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.777777777777778, | |
"grad_norm": 10.5625, | |
"learning_rate": 1.8421052631578947e-05, | |
"loss": 0.7034, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.777777777777778, | |
"eval_accuracy": 0.4722222222222222, | |
"eval_loss": 0.7664387822151184, | |
"eval_runtime": 0.6087, | |
"eval_samples_per_second": 118.28, | |
"eval_steps_per_second": 8.214, | |
"step": 26 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 10.0625, | |
"learning_rate": 1.7105263157894737e-05, | |
"loss": 0.692, | |
"step": 27 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_accuracy": 0.4722222222222222, | |
"eval_loss": 0.7706434726715088, | |
"eval_runtime": 0.6079, | |
"eval_samples_per_second": 118.431, | |
"eval_steps_per_second": 8.224, | |
"step": 27 | |
}, | |
{ | |
"epoch": 6.222222222222222, | |
"grad_norm": 2.578125, | |
"learning_rate": 1.5789473684210526e-05, | |
"loss": 0.6832, | |
"step": 28 | |
}, | |
{ | |
"epoch": 6.222222222222222, | |
"eval_accuracy": 0.4722222222222222, | |
"eval_loss": 0.7749837040901184, | |
"eval_runtime": 0.6081, | |
"eval_samples_per_second": 118.408, | |
"eval_steps_per_second": 8.223, | |
"step": 28 | |
}, | |
{ | |
"epoch": 6.444444444444445, | |
"grad_norm": 4.34375, | |
"learning_rate": 1.4473684210526317e-05, | |
"loss": 0.7021, | |
"step": 29 | |
}, | |
{ | |
"epoch": 6.444444444444445, | |
"eval_accuracy": 0.4583333333333333, | |
"eval_loss": 0.7776963710784912, | |
"eval_runtime": 0.6079, | |
"eval_samples_per_second": 118.436, | |
"eval_steps_per_second": 8.225, | |
"step": 29 | |
}, | |
{ | |
"epoch": 6.666666666666667, | |
"grad_norm": 2.625, | |
"learning_rate": 1.3157894736842106e-05, | |
"loss": 0.6819, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.666666666666667, | |
"eval_accuracy": 0.4722222222222222, | |
"eval_loss": 0.7759331464767456, | |
"eval_runtime": 0.6086, | |
"eval_samples_per_second": 118.3, | |
"eval_steps_per_second": 8.215, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.888888888888889, | |
"grad_norm": 3.53125, | |
"learning_rate": 1.1842105263157895e-05, | |
"loss": 0.6352, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.888888888888889, | |
"eval_accuracy": 0.4583333333333333, | |
"eval_loss": 0.7765842080116272, | |
"eval_runtime": 0.6091, | |
"eval_samples_per_second": 118.211, | |
"eval_steps_per_second": 8.209, | |
"step": 31 | |
}, | |
{ | |
"epoch": 7.111111111111111, | |
"grad_norm": 2.46875, | |
"learning_rate": 1.0526315789473684e-05, | |
"loss": 0.6888, | |
"step": 32 | |
}, | |
{ | |
"epoch": 7.111111111111111, | |
"eval_accuracy": 0.4722222222222222, | |
"eval_loss": 0.7748752236366272, | |
"eval_runtime": 0.6086, | |
"eval_samples_per_second": 118.309, | |
"eval_steps_per_second": 8.216, | |
"step": 32 | |
}, | |
{ | |
"epoch": 7.333333333333333, | |
"grad_norm": 3.375, | |
"learning_rate": 9.210526315789474e-06, | |
"loss": 0.6618, | |
"step": 33 | |
}, | |
{ | |
"epoch": 7.333333333333333, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.7731662392616272, | |
"eval_runtime": 0.6075, | |
"eval_samples_per_second": 118.515, | |
"eval_steps_per_second": 8.23, | |
"step": 33 | |
}, | |
{ | |
"epoch": 7.555555555555555, | |
"grad_norm": 6.65625, | |
"learning_rate": 7.894736842105263e-06, | |
"loss": 0.6754, | |
"step": 34 | |
}, | |
{ | |
"epoch": 7.555555555555555, | |
"eval_accuracy": 0.5138888888888888, | |
"eval_loss": 0.7746039628982544, | |
"eval_runtime": 0.6088, | |
"eval_samples_per_second": 118.274, | |
"eval_steps_per_second": 8.214, | |
"step": 34 | |
}, | |
{ | |
"epoch": 7.777777777777778, | |
"grad_norm": 6.375, | |
"learning_rate": 6.578947368421053e-06, | |
"loss": 0.6597, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.777777777777778, | |
"eval_accuracy": 0.5138888888888888, | |
"eval_loss": 0.7731662392616272, | |
"eval_runtime": 0.6081, | |
"eval_samples_per_second": 118.392, | |
"eval_steps_per_second": 8.222, | |
"step": 35 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 5.75, | |
"learning_rate": 5.263157894736842e-06, | |
"loss": 0.6567, | |
"step": 36 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.5138888888888888, | |
"eval_loss": 0.7737358808517456, | |
"eval_runtime": 0.6081, | |
"eval_samples_per_second": 118.395, | |
"eval_steps_per_second": 8.222, | |
"step": 36 | |
}, | |
{ | |
"epoch": 8.222222222222221, | |
"grad_norm": 4.90625, | |
"learning_rate": 3.9473684210526315e-06, | |
"loss": 0.6282, | |
"step": 37 | |
}, | |
{ | |
"epoch": 8.222222222222221, | |
"eval_accuracy": 0.5138888888888888, | |
"eval_loss": 0.7725694179534912, | |
"eval_runtime": 0.6079, | |
"eval_samples_per_second": 118.436, | |
"eval_steps_per_second": 8.225, | |
"step": 37 | |
}, | |
{ | |
"epoch": 8.444444444444445, | |
"grad_norm": 7.71875, | |
"learning_rate": 2.631578947368421e-06, | |
"loss": 0.6826, | |
"step": 38 | |
}, | |
{ | |
"epoch": 8.444444444444445, | |
"eval_accuracy": 0.5138888888888888, | |
"eval_loss": 0.7727593183517456, | |
"eval_runtime": 0.6088, | |
"eval_samples_per_second": 118.264, | |
"eval_steps_per_second": 8.213, | |
"step": 38 | |
}, | |
{ | |
"epoch": 8.666666666666666, | |
"grad_norm": 4.53125, | |
"learning_rate": 1.3157894736842106e-06, | |
"loss": 0.6556, | |
"step": 39 | |
}, | |
{ | |
"epoch": 8.666666666666666, | |
"eval_accuracy": 0.5138888888888888, | |
"eval_loss": 0.7752007246017456, | |
"eval_runtime": 0.6082, | |
"eval_samples_per_second": 118.385, | |
"eval_steps_per_second": 8.221, | |
"step": 39 | |
}, | |
{ | |
"epoch": 8.88888888888889, | |
"grad_norm": 3.796875, | |
"learning_rate": 0.0, | |
"loss": 0.6745, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.88888888888889, | |
"eval_accuracy": 0.5138888888888888, | |
"eval_loss": 0.7738173007965088, | |
"eval_runtime": 0.6087, | |
"eval_samples_per_second": 118.294, | |
"eval_steps_per_second": 8.215, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.88888888888889, | |
"step": 40, | |
"total_flos": 3923745857077248.0, | |
"train_loss": 0.695129108428955, | |
"train_runtime": 77.1509, | |
"train_samples_per_second": 36.811, | |
"train_steps_per_second": 0.518 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 40, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": false, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 3923745857077248.0, | |
"train_batch_size": 4, | |
"trial_name": null, | |
"trial_params": null | |
} | |