|
{ |
|
"best_metric": 0.18809613585472107, |
|
"best_model_checkpoint": "autotrain-28eqp-t2e16/checkpoint-150", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.269129753112793, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.6984, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4.074235916137695, |
|
"learning_rate": 4.962962962962963e-05, |
|
"loss": 0.6593, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 5.697342395782471, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.4673, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 5.995659351348877, |
|
"learning_rate": 4.3703703703703705e-05, |
|
"loss": 0.226, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 6.6095428466796875, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.1767, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.2729267477989197, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.4345, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8968481375358166, |
|
"eval_auc": 0.9792817679558011, |
|
"eval_f1": 0.8902439024390244, |
|
"eval_loss": 0.45533695816993713, |
|
"eval_precision": 0.9931972789115646, |
|
"eval_recall": 0.8066298342541437, |
|
"eval_runtime": 76.2971, |
|
"eval_samples_per_second": 4.574, |
|
"eval_steps_per_second": 0.288, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.21169081330299377, |
|
"learning_rate": 3.481481481481482e-05, |
|
"loss": 0.3073, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 8.168057441711426, |
|
"learning_rate": 3.185185185185185e-05, |
|
"loss": 0.1746, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.0985065698623657, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.118, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.13789577782154083, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.0233, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.7061790227890015, |
|
"learning_rate": 2.2962962962962965e-05, |
|
"loss": 0.2002, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 6.507961750030518, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0259, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9484240687679083, |
|
"eval_auc": 0.9819126545645883, |
|
"eval_f1": 0.9516129032258065, |
|
"eval_loss": 0.2552463114261627, |
|
"eval_precision": 0.9267015706806283, |
|
"eval_recall": 0.9779005524861878, |
|
"eval_runtime": 75.7963, |
|
"eval_samples_per_second": 4.604, |
|
"eval_steps_per_second": 0.29, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.1205369234085083, |
|
"learning_rate": 1.7037037037037038e-05, |
|
"loss": 0.0081, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.08243238925933838, |
|
"learning_rate": 1.4074074074074075e-05, |
|
"loss": 0.0458, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 54.06669998168945, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1301, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.08115862309932709, |
|
"learning_rate": 8.14814814814815e-06, |
|
"loss": 0.0053, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 0.06672395765781403, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.0075, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.0613614059984684, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 0.0871, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9656160458452722, |
|
"eval_auc": 0.9890489344909235, |
|
"eval_f1": 0.967032967032967, |
|
"eval_loss": 0.18809613585472107, |
|
"eval_precision": 0.9617486338797814, |
|
"eval_recall": 0.9723756906077348, |
|
"eval_runtime": 75.8185, |
|
"eval_samples_per_second": 4.603, |
|
"eval_steps_per_second": 0.29, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 8, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 77750711202816.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|