Rodrigo1771's picture
End of training
e85d1ad verified
raw
history blame
6.49 kB
{
"best_metric": 0.17114591920857378,
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2705",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 5410,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9242144177449169,
"grad_norm": 1.4528056383132935,
"learning_rate": 4.537892791127542e-05,
"loss": 0.3191,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.7762986255097697,
"eval_f1": 0.15197789412449098,
"eval_loss": 0.815065860748291,
"eval_precision": 0.08251737207833228,
"eval_recall": 0.9604779411764706,
"eval_runtime": 14.2875,
"eval_samples_per_second": 476.64,
"eval_steps_per_second": 59.632,
"step": 541
},
{
"epoch": 1.8484288354898335,
"grad_norm": 1.5587466955184937,
"learning_rate": 4.075785582255083e-05,
"loss": 0.1619,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.7900778557403161,
"eval_f1": 0.168,
"eval_loss": 0.833220362663269,
"eval_precision": 0.09218777996774771,
"eval_recall": 0.9457720588235294,
"eval_runtime": 14.245,
"eval_samples_per_second": 478.064,
"eval_steps_per_second": 59.811,
"step": 1082
},
{
"epoch": 2.7726432532347505,
"grad_norm": 0.9616082310676575,
"learning_rate": 3.613678373382625e-05,
"loss": 0.11,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.7738201491205184,
"eval_f1": 0.16426810845193607,
"eval_loss": 1.1093989610671997,
"eval_precision": 0.08991208982505004,
"eval_recall": 0.9494485294117647,
"eval_runtime": 14.3321,
"eval_samples_per_second": 475.159,
"eval_steps_per_second": 59.447,
"step": 1623
},
{
"epoch": 3.6968576709796674,
"grad_norm": 1.4018645286560059,
"learning_rate": 3.1515711645101665e-05,
"loss": 0.0764,
"step": 2000
},
{
"epoch": 4.0,
"eval_accuracy": 0.7740055199307949,
"eval_f1": 0.16182605273514364,
"eval_loss": 1.1206157207489014,
"eval_precision": 0.08849100456227942,
"eval_recall": 0.9448529411764706,
"eval_runtime": 14.4251,
"eval_samples_per_second": 472.092,
"eval_steps_per_second": 59.064,
"step": 2164
},
{
"epoch": 4.621072088724584,
"grad_norm": 1.0904265642166138,
"learning_rate": 2.6894639556377083e-05,
"loss": 0.0567,
"step": 2500
},
{
"epoch": 5.0,
"eval_accuracy": 0.7890274211487498,
"eval_f1": 0.17114591920857378,
"eval_loss": 1.1805996894836426,
"eval_precision": 0.09400470929179497,
"eval_recall": 0.9540441176470589,
"eval_runtime": 14.1983,
"eval_samples_per_second": 479.634,
"eval_steps_per_second": 60.007,
"step": 2705
},
{
"epoch": 5.545286506469501,
"grad_norm": 1.240962028503418,
"learning_rate": 2.2273567467652497e-05,
"loss": 0.0428,
"step": 3000
},
{
"epoch": 6.0,
"eval_accuracy": 0.7827454103560493,
"eval_f1": 0.1644558094933674,
"eval_loss": 1.31381094455719,
"eval_precision": 0.09005776299667426,
"eval_recall": 0.9457720588235294,
"eval_runtime": 14.1863,
"eval_samples_per_second": 480.042,
"eval_steps_per_second": 60.058,
"step": 3246
},
{
"epoch": 6.469500924214418,
"grad_norm": 0.44737720489501953,
"learning_rate": 1.7652495378927914e-05,
"loss": 0.0332,
"step": 3500
},
{
"epoch": 7.0,
"eval_accuracy": 0.7874071429552226,
"eval_f1": 0.16785861076859843,
"eval_loss": 1.4008890390396118,
"eval_precision": 0.09217297102103457,
"eval_recall": 0.9384191176470589,
"eval_runtime": 14.2845,
"eval_samples_per_second": 476.742,
"eval_steps_per_second": 59.645,
"step": 3787
},
{
"epoch": 7.393715341959335,
"grad_norm": 0.7116318941116333,
"learning_rate": 1.3031423290203328e-05,
"loss": 0.0257,
"step": 4000
},
{
"epoch": 8.0,
"eval_accuracy": 0.7790997844206132,
"eval_f1": 0.16497502819397453,
"eval_loss": 1.5610512495040894,
"eval_precision": 0.0904114426982165,
"eval_recall": 0.9411764705882353,
"eval_runtime": 14.5465,
"eval_samples_per_second": 468.153,
"eval_steps_per_second": 58.571,
"step": 4328
},
{
"epoch": 8.317929759704251,
"grad_norm": 0.9520462155342102,
"learning_rate": 8.410351201478742e-06,
"loss": 0.022,
"step": 4500
},
{
"epoch": 9.0,
"eval_accuracy": 0.7835212215249838,
"eval_f1": 0.16788916055419725,
"eval_loss": 1.5933887958526611,
"eval_precision": 0.09211232337685567,
"eval_recall": 0.9466911764705882,
"eval_runtime": 14.1983,
"eval_samples_per_second": 479.635,
"eval_steps_per_second": 60.007,
"step": 4869
},
{
"epoch": 9.242144177449168,
"grad_norm": 0.6707109212875366,
"learning_rate": 3.789279112754159e-06,
"loss": 0.0181,
"step": 5000
},
{
"epoch": 10.0,
"eval_accuracy": 0.7862743213368668,
"eval_f1": 0.16804385175488834,
"eval_loss": 1.6103088855743408,
"eval_precision": 0.09223170184104176,
"eval_recall": 0.9439338235294118,
"eval_runtime": 14.5612,
"eval_samples_per_second": 467.681,
"eval_steps_per_second": 58.512,
"step": 5410
},
{
"epoch": 10.0,
"step": 5410,
"total_flos": 1.7176580067661056e+16,
"train_loss": 0.0812657987344287,
"train_runtime": 1542.5562,
"train_samples_per_second": 224.329,
"train_steps_per_second": 3.507
}
],
"logging_steps": 500,
"max_steps": 5410,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.7176580067661056e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}