{ "best_metric": 0.1797216236591339, "best_model_checkpoint": "autotrain-vivi3n/checkpoint-1500", "epoch": 3.0, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 20.401409149169922, "learning_rate": 8.333333333333334e-06, "loss": 1.7642, "step": 25 }, { "epoch": 0.1, "grad_norm": 12.83076286315918, "learning_rate": 1.6666666666666667e-05, "loss": 1.133, "step": 50 }, { "epoch": 0.15, "grad_norm": 3.4568631649017334, "learning_rate": 2.5e-05, "loss": 0.7359, "step": 75 }, { "epoch": 0.2, "grad_norm": 3.5798301696777344, "learning_rate": 3.3333333333333335e-05, "loss": 0.7359, "step": 100 }, { "epoch": 0.25, "grad_norm": 4.27333402633667, "learning_rate": 4.166666666666667e-05, "loss": 0.6872, "step": 125 }, { "epoch": 0.3, "grad_norm": 5.849865436553955, "learning_rate": 5e-05, "loss": 0.6644, "step": 150 }, { "epoch": 0.35, "grad_norm": 3.0048937797546387, "learning_rate": 4.9074074074074075e-05, "loss": 0.667, "step": 175 }, { "epoch": 0.4, "grad_norm": 6.366631031036377, "learning_rate": 4.814814814814815e-05, "loss": 0.6231, "step": 200 }, { "epoch": 0.45, "grad_norm": 4.609536170959473, "learning_rate": 4.722222222222222e-05, "loss": 0.6099, "step": 225 }, { "epoch": 0.5, "grad_norm": 4.420734882354736, "learning_rate": 4.62962962962963e-05, "loss": 0.563, "step": 250 }, { "epoch": 0.55, "grad_norm": 2.8860912322998047, "learning_rate": 4.5370370370370374e-05, "loss": 0.6232, "step": 275 }, { "epoch": 0.6, "grad_norm": 4.6798577308654785, "learning_rate": 4.4444444444444447e-05, "loss": 0.5028, "step": 300 }, { "epoch": 0.65, "grad_norm": 6.540982246398926, "learning_rate": 4.351851851851852e-05, "loss": 0.529, "step": 325 }, { "epoch": 0.7, "grad_norm": 3.8343939781188965, "learning_rate": 4.259259259259259e-05, "loss": 0.5219, "step": 350 }, { "epoch": 0.75, "grad_norm": 2.1247823238372803, "learning_rate": 4.166666666666667e-05, "loss": 0.4827, "step": 375 }, { "epoch": 0.8, "grad_norm": 7.609323501586914, "learning_rate": 4.074074074074074e-05, "loss": 0.4975, "step": 400 }, { "epoch": 0.85, "grad_norm": 5.313252925872803, "learning_rate": 3.981481481481482e-05, "loss": 0.5284, "step": 425 }, { "epoch": 0.9, "grad_norm": 7.599594593048096, "learning_rate": 3.888888888888889e-05, "loss": 0.4581, "step": 450 }, { "epoch": 0.95, "grad_norm": 3.263301134109497, "learning_rate": 3.7962962962962964e-05, "loss": 0.5126, "step": 475 }, { "epoch": 1.0, "grad_norm": 5.882161617279053, "learning_rate": 3.7037037037037037e-05, "loss": 0.397, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.852, "eval_auc": 0.943408, "eval_f1": 0.8366445916114791, "eval_loss": 0.32579681277275085, "eval_precision": 0.9334975369458128, "eval_recall": 0.758, "eval_runtime": 284.8907, "eval_samples_per_second": 3.51, "eval_steps_per_second": 0.221, "step": 500 }, { "epoch": 1.05, "grad_norm": 4.248765468597412, "learning_rate": 3.611111111111111e-05, "loss": 0.3009, "step": 525 }, { "epoch": 1.1, "grad_norm": 16.29542350769043, "learning_rate": 3.518518518518519e-05, "loss": 0.3521, "step": 550 }, { "epoch": 1.15, "grad_norm": 4.925727367401123, "learning_rate": 3.425925925925926e-05, "loss": 0.3923, "step": 575 }, { "epoch": 1.2, "grad_norm": 9.481324195861816, "learning_rate": 3.3333333333333335e-05, "loss": 0.3771, "step": 600 }, { "epoch": 1.25, "grad_norm": 5.314146041870117, "learning_rate": 3.240740740740741e-05, "loss": 0.3731, "step": 625 }, { "epoch": 1.3, "grad_norm": 5.7874531745910645, "learning_rate": 3.148148148148148e-05, "loss": 0.276, "step": 650 }, { "epoch": 1.35, "grad_norm": 11.975399017333984, "learning_rate": 3.055555555555556e-05, "loss": 0.4218, "step": 675 }, { "epoch": 1.4, "grad_norm": 3.5423085689544678, "learning_rate": 2.962962962962963e-05, "loss": 0.2853, "step": 700 }, { "epoch": 1.45, "grad_norm": 2.484663486480713, "learning_rate": 2.8703703703703706e-05, "loss": 0.2941, "step": 725 }, { "epoch": 1.5, "grad_norm": 8.482043266296387, "learning_rate": 2.777777777777778e-05, "loss": 0.2908, "step": 750 }, { "epoch": 1.55, "grad_norm": 11.303189277648926, "learning_rate": 2.6851851851851855e-05, "loss": 0.2806, "step": 775 }, { "epoch": 1.6, "grad_norm": 7.4056291580200195, "learning_rate": 2.5925925925925925e-05, "loss": 0.2928, "step": 800 }, { "epoch": 1.65, "grad_norm": 3.922528028488159, "learning_rate": 2.5e-05, "loss": 0.3142, "step": 825 }, { "epoch": 1.7, "grad_norm": 9.461620330810547, "learning_rate": 2.4074074074074074e-05, "loss": 0.5345, "step": 850 }, { "epoch": 1.75, "grad_norm": 3.0611841678619385, "learning_rate": 2.314814814814815e-05, "loss": 0.273, "step": 875 }, { "epoch": 1.8, "grad_norm": 6.015491962432861, "learning_rate": 2.2222222222222223e-05, "loss": 0.3177, "step": 900 }, { "epoch": 1.85, "grad_norm": 7.551340103149414, "learning_rate": 2.1296296296296296e-05, "loss": 0.2656, "step": 925 }, { "epoch": 1.9, "grad_norm": 13.884527206420898, "learning_rate": 2.037037037037037e-05, "loss": 0.3409, "step": 950 }, { "epoch": 1.95, "grad_norm": 0.6236781477928162, "learning_rate": 1.9444444444444445e-05, "loss": 0.222, "step": 975 }, { "epoch": 2.0, "grad_norm": 1.7288349866867065, "learning_rate": 1.8518518518518518e-05, "loss": 0.2162, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.913, "eval_auc": 0.978072, "eval_f1": 0.9144542772861357, "eval_loss": 0.22896826267242432, "eval_precision": 0.8994197292069632, "eval_recall": 0.93, "eval_runtime": 285.1891, "eval_samples_per_second": 3.506, "eval_steps_per_second": 0.221, "step": 1000 }, { "epoch": 2.05, "grad_norm": 24.80970001220703, "learning_rate": 1.7592592592592595e-05, "loss": 0.2816, "step": 1025 }, { "epoch": 2.1, "grad_norm": 3.5682404041290283, "learning_rate": 1.6666666666666667e-05, "loss": 0.1733, "step": 1050 }, { "epoch": 2.15, "grad_norm": 11.192222595214844, "learning_rate": 1.574074074074074e-05, "loss": 0.2307, "step": 1075 }, { "epoch": 2.2, "grad_norm": 3.6742281913757324, "learning_rate": 1.4814814814814815e-05, "loss": 0.1328, "step": 1100 }, { "epoch": 2.25, "grad_norm": 12.249692916870117, "learning_rate": 1.388888888888889e-05, "loss": 0.2833, "step": 1125 }, { "epoch": 2.3, "grad_norm": 9.88560676574707, "learning_rate": 1.2962962962962962e-05, "loss": 0.2459, "step": 1150 }, { "epoch": 2.35, "grad_norm": 2.8054721355438232, "learning_rate": 1.2037037037037037e-05, "loss": 0.2079, "step": 1175 }, { "epoch": 2.4, "grad_norm": 10.091593742370605, "learning_rate": 1.1111111111111112e-05, "loss": 0.2933, "step": 1200 }, { "epoch": 2.45, "grad_norm": 0.920978844165802, "learning_rate": 1.0185185185185185e-05, "loss": 0.1921, "step": 1225 }, { "epoch": 2.5, "grad_norm": 0.0899479016661644, "learning_rate": 9.259259259259259e-06, "loss": 0.1607, "step": 1250 }, { "epoch": 2.55, "grad_norm": 15.381816864013672, "learning_rate": 8.333333333333334e-06, "loss": 0.2516, "step": 1275 }, { "epoch": 2.6, "grad_norm": 0.061589837074279785, "learning_rate": 7.4074074074074075e-06, "loss": 0.2309, "step": 1300 }, { "epoch": 2.65, "grad_norm": 7.775277614593506, "learning_rate": 6.481481481481481e-06, "loss": 0.312, "step": 1325 }, { "epoch": 2.7, "grad_norm": 5.62130880355835, "learning_rate": 5.555555555555556e-06, "loss": 0.2236, "step": 1350 }, { "epoch": 2.75, "grad_norm": 15.061668395996094, "learning_rate": 4.6296296296296296e-06, "loss": 0.2173, "step": 1375 }, { "epoch": 2.8, "grad_norm": 4.630084991455078, "learning_rate": 3.7037037037037037e-06, "loss": 0.2465, "step": 1400 }, { "epoch": 2.85, "grad_norm": 6.20747184753418, "learning_rate": 2.777777777777778e-06, "loss": 0.1715, "step": 1425 }, { "epoch": 2.9, "grad_norm": 1.773815393447876, "learning_rate": 1.8518518518518519e-06, "loss": 0.2216, "step": 1450 }, { "epoch": 2.95, "grad_norm": 5.9370436668396, "learning_rate": 9.259259259259259e-07, "loss": 0.1786, "step": 1475 }, { "epoch": 3.0, "grad_norm": 4.292758464813232, "learning_rate": 0.0, "loss": 0.1342, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.935, "eval_auc": 0.986464, "eval_f1": 0.9353233830845771, "eval_loss": 0.1797216236591339, "eval_precision": 0.9306930693069307, "eval_recall": 0.94, "eval_runtime": 285.872, "eval_samples_per_second": 3.498, "eval_steps_per_second": 0.22, "step": 1500 } ], "logging_steps": 25, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.29903875375104e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }