{ "best_metric": 0.9934865429519479, "best_model_checkpoint": "omarmomen/sf_babylm_1/finetune/main_verb_control/checkpoint-600", "epoch": 10.0, "global_step": 1280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.56, "eval_accuracy": 0.9893577694892883, "eval_f1": 0.9893617021276596, "eval_loss": 0.05015372857451439, "eval_mcc": 0.9787174883729357, "eval_runtime": 27.4509, "eval_samples_per_second": 595.61, "eval_steps_per_second": 74.46, "step": 200 }, { "epoch": 3.12, "eval_accuracy": 0.9921100735664368, "eval_f1": 0.9920678841542151, "eval_loss": 0.04251589626073837, "eval_mcc": 0.9842637485530025, "eval_runtime": 27.5753, "eval_samples_per_second": 592.922, "eval_steps_per_second": 74.124, "step": 400 }, { "epoch": 3.91, "learning_rate": 3.0468750000000002e-05, "loss": 0.0781, "step": 500 }, { "epoch": 4.69, "eval_accuracy": 0.9935168027877808, "eval_f1": 0.9934865429519479, "eval_loss": 0.042066145688295364, "eval_mcc": 0.9870657394565869, "eval_runtime": 27.392, "eval_samples_per_second": 596.89, "eval_steps_per_second": 74.62, "step": 600 }, { "epoch": 6.25, "eval_accuracy": 0.9931498765945435, "eval_f1": 0.9931170108161259, "eval_loss": 0.04584728553891182, "eval_mcc": 0.9863337424620885, "eval_runtime": 27.5365, "eval_samples_per_second": 593.757, "eval_steps_per_second": 74.229, "step": 800 }, { "epoch": 7.81, "learning_rate": 1.09375e-05, "loss": 0.0003, "step": 1000 }, { "epoch": 7.81, "eval_accuracy": 0.9933333396911621, "eval_f1": 0.9933108315434183, "eval_loss": 0.04603412747383118, "eval_mcc": 0.986681565476627, "eval_runtime": 27.5008, "eval_samples_per_second": 594.529, "eval_steps_per_second": 74.325, "step": 1000 }, { "epoch": 9.38, "eval_accuracy": 0.9925382137298584, "eval_f1": 0.9925006147037128, "eval_loss": 0.04715948551893234, "eval_mcc": 0.9851145750966622, "eval_runtime": 27.359, "eval_samples_per_second": 597.609, "eval_steps_per_second": 74.71, "step": 1200 }, { "epoch": 10.0, "step": 1280, "total_flos": 2092658450872320.0, "train_loss": 0.03072957310359925, "train_runtime": 433.9699, "train_samples_per_second": 188.17, "train_steps_per_second": 2.95 } ], "max_steps": 1280, "num_train_epochs": 10, "total_flos": 2092658450872320.0, "trial_name": null, "trial_params": null }