|
{ |
|
"best_metric": 0.17114591920857378, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2705", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 5410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9242144177449169, |
|
"grad_norm": 1.4528056383132935, |
|
"learning_rate": 4.537892791127542e-05, |
|
"loss": 0.3191, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7762986255097697, |
|
"eval_f1": 0.15197789412449098, |
|
"eval_loss": 0.815065860748291, |
|
"eval_precision": 0.08251737207833228, |
|
"eval_recall": 0.9604779411764706, |
|
"eval_runtime": 14.2875, |
|
"eval_samples_per_second": 476.64, |
|
"eval_steps_per_second": 59.632, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.8484288354898335, |
|
"grad_norm": 1.5587466955184937, |
|
"learning_rate": 4.075785582255083e-05, |
|
"loss": 0.1619, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7900778557403161, |
|
"eval_f1": 0.168, |
|
"eval_loss": 0.833220362663269, |
|
"eval_precision": 0.09218777996774771, |
|
"eval_recall": 0.9457720588235294, |
|
"eval_runtime": 14.245, |
|
"eval_samples_per_second": 478.064, |
|
"eval_steps_per_second": 59.811, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.7726432532347505, |
|
"grad_norm": 0.9616082310676575, |
|
"learning_rate": 3.613678373382625e-05, |
|
"loss": 0.11, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7738201491205184, |
|
"eval_f1": 0.16426810845193607, |
|
"eval_loss": 1.1093989610671997, |
|
"eval_precision": 0.08991208982505004, |
|
"eval_recall": 0.9494485294117647, |
|
"eval_runtime": 14.3321, |
|
"eval_samples_per_second": 475.159, |
|
"eval_steps_per_second": 59.447, |
|
"step": 1623 |
|
}, |
|
{ |
|
"epoch": 3.6968576709796674, |
|
"grad_norm": 1.4018645286560059, |
|
"learning_rate": 3.1515711645101665e-05, |
|
"loss": 0.0764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7740055199307949, |
|
"eval_f1": 0.16182605273514364, |
|
"eval_loss": 1.1206157207489014, |
|
"eval_precision": 0.08849100456227942, |
|
"eval_recall": 0.9448529411764706, |
|
"eval_runtime": 14.4251, |
|
"eval_samples_per_second": 472.092, |
|
"eval_steps_per_second": 59.064, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 4.621072088724584, |
|
"grad_norm": 1.0904265642166138, |
|
"learning_rate": 2.6894639556377083e-05, |
|
"loss": 0.0567, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7890274211487498, |
|
"eval_f1": 0.17114591920857378, |
|
"eval_loss": 1.1805996894836426, |
|
"eval_precision": 0.09400470929179497, |
|
"eval_recall": 0.9540441176470589, |
|
"eval_runtime": 14.1983, |
|
"eval_samples_per_second": 479.634, |
|
"eval_steps_per_second": 60.007, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 5.545286506469501, |
|
"grad_norm": 1.240962028503418, |
|
"learning_rate": 2.2273567467652497e-05, |
|
"loss": 0.0428, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7827454103560493, |
|
"eval_f1": 0.1644558094933674, |
|
"eval_loss": 1.31381094455719, |
|
"eval_precision": 0.09005776299667426, |
|
"eval_recall": 0.9457720588235294, |
|
"eval_runtime": 14.1863, |
|
"eval_samples_per_second": 480.042, |
|
"eval_steps_per_second": 60.058, |
|
"step": 3246 |
|
}, |
|
{ |
|
"epoch": 6.469500924214418, |
|
"grad_norm": 0.44737720489501953, |
|
"learning_rate": 1.7652495378927914e-05, |
|
"loss": 0.0332, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7874071429552226, |
|
"eval_f1": 0.16785861076859843, |
|
"eval_loss": 1.4008890390396118, |
|
"eval_precision": 0.09217297102103457, |
|
"eval_recall": 0.9384191176470589, |
|
"eval_runtime": 14.2845, |
|
"eval_samples_per_second": 476.742, |
|
"eval_steps_per_second": 59.645, |
|
"step": 3787 |
|
}, |
|
{ |
|
"epoch": 7.393715341959335, |
|
"grad_norm": 0.7116318941116333, |
|
"learning_rate": 1.3031423290203328e-05, |
|
"loss": 0.0257, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7790997844206132, |
|
"eval_f1": 0.16497502819397453, |
|
"eval_loss": 1.5610512495040894, |
|
"eval_precision": 0.0904114426982165, |
|
"eval_recall": 0.9411764705882353, |
|
"eval_runtime": 14.5465, |
|
"eval_samples_per_second": 468.153, |
|
"eval_steps_per_second": 58.571, |
|
"step": 4328 |
|
}, |
|
{ |
|
"epoch": 8.317929759704251, |
|
"grad_norm": 0.9520462155342102, |
|
"learning_rate": 8.410351201478742e-06, |
|
"loss": 0.022, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7835212215249838, |
|
"eval_f1": 0.16788916055419725, |
|
"eval_loss": 1.5933887958526611, |
|
"eval_precision": 0.09211232337685567, |
|
"eval_recall": 0.9466911764705882, |
|
"eval_runtime": 14.1983, |
|
"eval_samples_per_second": 479.635, |
|
"eval_steps_per_second": 60.007, |
|
"step": 4869 |
|
}, |
|
{ |
|
"epoch": 9.242144177449168, |
|
"grad_norm": 0.6707109212875366, |
|
"learning_rate": 3.789279112754159e-06, |
|
"loss": 0.0181, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7862743213368668, |
|
"eval_f1": 0.16804385175488834, |
|
"eval_loss": 1.6103088855743408, |
|
"eval_precision": 0.09223170184104176, |
|
"eval_recall": 0.9439338235294118, |
|
"eval_runtime": 14.5612, |
|
"eval_samples_per_second": 467.681, |
|
"eval_steps_per_second": 58.512, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5410, |
|
"total_flos": 1.7176580067661056e+16, |
|
"train_loss": 0.0812657987344287, |
|
"train_runtime": 1542.5562, |
|
"train_samples_per_second": 224.329, |
|
"train_steps_per_second": 3.507 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5410, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7176580067661056e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|