{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 1, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "grad_norm": 22.375, "learning_rate": 2.5e-05, "loss": 1.3311, "step": 1 }, { "epoch": 0.25, "eval_accuracy": 0.3548387096774194, "eval_f1": 0.2145748987854251, "eval_loss": 1.257245421409607, "eval_runtime": 1.4238, "eval_samples_per_second": 43.545, "eval_steps_per_second": 2.809, "step": 1 }, { "epoch": 0.5, "grad_norm": 23.125, "learning_rate": 5e-05, "loss": 1.3826, "step": 2 }, { "epoch": 0.5, "eval_accuracy": 0.3709677419354839, "eval_f1": 0.18930041152263377, "eval_loss": 1.2128275632858276, "eval_runtime": 1.4194, "eval_samples_per_second": 43.681, "eval_steps_per_second": 2.818, "step": 2 }, { "epoch": 0.75, "grad_norm": 21.375, "learning_rate": 4.868421052631579e-05, "loss": 1.1411, "step": 3 }, { "epoch": 0.75, "eval_accuracy": 0.43548387096774194, "eval_f1": 0.20689655172413793, "eval_loss": 1.1111390590667725, "eval_runtime": 1.3732, "eval_samples_per_second": 45.15, "eval_steps_per_second": 2.913, "step": 3 }, { "epoch": 1.0, "grad_norm": 19.125, "learning_rate": 4.736842105263158e-05, "loss": 1.0967, "step": 4 }, { "epoch": 1.0, "eval_accuracy": 0.45161290322580644, "eval_f1": 0.20740740740740746, "eval_loss": 1.0505292415618896, "eval_runtime": 1.4197, "eval_samples_per_second": 43.671, "eval_steps_per_second": 2.817, "step": 4 }, { "epoch": 1.25, "grad_norm": 16.75, "learning_rate": 4.605263157894737e-05, "loss": 1.0499, "step": 5 }, { "epoch": 1.25, "eval_accuracy": 0.46774193548387094, "eval_f1": 0.23196004993757802, "eval_loss": 1.023846983909607, "eval_runtime": 1.4198, "eval_samples_per_second": 43.669, "eval_steps_per_second": 2.817, "step": 5 }, { "epoch": 1.5, "grad_norm": 13.25, "learning_rate": 4.473684210526316e-05, "loss": 0.9663, "step": 6 }, { "epoch": 1.5, "eval_accuracy": 0.46774193548387094, "eval_f1": 0.23196004993757802, "eval_loss": 1.0119235515594482, "eval_runtime": 1.4209, "eval_samples_per_second": 43.633, "eval_steps_per_second": 2.815, "step": 6 }, { "epoch": 1.75, "grad_norm": 14.625, "learning_rate": 4.342105263157895e-05, "loss": 1.0378, "step": 7 }, { "epoch": 1.75, "eval_accuracy": 0.46774193548387094, "eval_f1": 0.21245421245421245, "eval_loss": 1.0040794610977173, "eval_runtime": 1.4206, "eval_samples_per_second": 43.643, "eval_steps_per_second": 2.816, "step": 7 }, { "epoch": 2.0, "grad_norm": 15.125, "learning_rate": 4.210526315789474e-05, "loss": 0.9995, "step": 8 }, { "epoch": 2.0, "eval_accuracy": 0.46774193548387094, "eval_f1": 0.23196004993757802, "eval_loss": 0.9963457584381104, "eval_runtime": 1.4208, "eval_samples_per_second": 43.638, "eval_steps_per_second": 2.815, "step": 8 }, { "epoch": 2.25, "grad_norm": 6.09375, "learning_rate": 4.078947368421053e-05, "loss": 0.8568, "step": 9 }, { "epoch": 2.25, "eval_accuracy": 0.46774193548387094, "eval_f1": 0.21245421245421245, "eval_loss": 0.9923135042190552, "eval_runtime": 1.4203, "eval_samples_per_second": 43.652, "eval_steps_per_second": 2.816, "step": 9 }, { "epoch": 2.5, "grad_norm": 8.6875, "learning_rate": 3.9473684210526316e-05, "loss": 0.9505, "step": 10 }, { "epoch": 2.5, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.25513196480938416, "eval_loss": 0.9838079810142517, "eval_runtime": 1.4208, "eval_samples_per_second": 43.636, "eval_steps_per_second": 2.815, "step": 10 }, { "epoch": 2.75, "grad_norm": 12.0, "learning_rate": 3.815789473684211e-05, "loss": 1.0523, "step": 11 }, { "epoch": 2.75, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.26990838618745594, "eval_loss": 0.9715851545333862, "eval_runtime": 1.4206, "eval_samples_per_second": 43.644, "eval_steps_per_second": 2.816, "step": 11 }, { "epoch": 3.0, "grad_norm": 9.25, "learning_rate": 3.6842105263157895e-05, "loss": 0.8034, "step": 12 }, { "epoch": 3.0, "eval_accuracy": 0.5, "eval_f1": 0.3108974358974359, "eval_loss": 0.9636703133583069, "eval_runtime": 1.4203, "eval_samples_per_second": 43.653, "eval_steps_per_second": 2.816, "step": 12 }, { "epoch": 3.25, "grad_norm": 7.15625, "learning_rate": 3.5526315789473684e-05, "loss": 0.8381, "step": 13 }, { "epoch": 3.25, "eval_accuracy": 0.5, "eval_f1": 0.3108974358974359, "eval_loss": 0.9614336490631104, "eval_runtime": 1.4203, "eval_samples_per_second": 43.652, "eval_steps_per_second": 2.816, "step": 13 }, { "epoch": 3.5, "grad_norm": 5.65625, "learning_rate": 3.421052631578947e-05, "loss": 0.9831, "step": 14 }, { "epoch": 3.5, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9595750570297241, "eval_runtime": 1.3701, "eval_samples_per_second": 45.253, "eval_steps_per_second": 2.92, "step": 14 }, { "epoch": 3.75, "grad_norm": 4.21875, "learning_rate": 3.289473684210527e-05, "loss": 0.7901, "step": 15 }, { "epoch": 3.75, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9619613289833069, "eval_runtime": 1.4199, "eval_samples_per_second": 43.667, "eval_steps_per_second": 2.817, "step": 15 }, { "epoch": 4.0, "grad_norm": 6.65625, "learning_rate": 3.157894736842105e-05, "loss": 0.7296, "step": 16 }, { "epoch": 4.0, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9658597111701965, "eval_runtime": 1.4204, "eval_samples_per_second": 43.65, "eval_steps_per_second": 2.816, "step": 16 }, { "epoch": 4.25, "grad_norm": 8.25, "learning_rate": 3.0263157894736844e-05, "loss": 0.7682, "step": 17 }, { "epoch": 4.25, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9644736051559448, "eval_runtime": 1.4189, "eval_samples_per_second": 43.695, "eval_steps_per_second": 2.819, "step": 17 }, { "epoch": 4.5, "grad_norm": 6.71875, "learning_rate": 2.8947368421052634e-05, "loss": 0.88, "step": 18 }, { "epoch": 4.5, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9645444750785828, "eval_runtime": 1.419, "eval_samples_per_second": 43.694, "eval_steps_per_second": 2.819, "step": 18 }, { "epoch": 4.75, "grad_norm": 3.921875, "learning_rate": 2.7631578947368426e-05, "loss": 0.8078, "step": 19 }, { "epoch": 4.75, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.961181640625, "eval_runtime": 1.4194, "eval_samples_per_second": 43.679, "eval_steps_per_second": 2.818, "step": 19 }, { "epoch": 5.0, "grad_norm": 5.09375, "learning_rate": 2.6315789473684212e-05, "loss": 0.7689, "step": 20 }, { "epoch": 5.0, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9596459269523621, "eval_runtime": 1.419, "eval_samples_per_second": 43.692, "eval_steps_per_second": 2.819, "step": 20 }, { "epoch": 5.25, "grad_norm": 10.0, "learning_rate": 2.5e-05, "loss": 1.0543, "step": 21 }, { "epoch": 5.25, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9581180810928345, "eval_runtime": 1.4191, "eval_samples_per_second": 43.691, "eval_steps_per_second": 2.819, "step": 21 }, { "epoch": 5.5, "grad_norm": 3.515625, "learning_rate": 2.368421052631579e-05, "loss": 0.7845, "step": 22 }, { "epoch": 5.5, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9573659300804138, "eval_runtime": 1.4193, "eval_samples_per_second": 43.684, "eval_steps_per_second": 2.818, "step": 22 }, { "epoch": 5.75, "grad_norm": 2.765625, "learning_rate": 2.236842105263158e-05, "loss": 0.7907, "step": 23 }, { "epoch": 5.75, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9584291577339172, "eval_runtime": 1.4194, "eval_samples_per_second": 43.681, "eval_steps_per_second": 2.818, "step": 23 }, { "epoch": 6.0, "grad_norm": 3.15625, "learning_rate": 2.105263157894737e-05, "loss": 0.7345, "step": 24 }, { "epoch": 6.0, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9615005850791931, "eval_runtime": 1.4194, "eval_samples_per_second": 43.679, "eval_steps_per_second": 2.818, "step": 24 }, { "epoch": 6.25, "grad_norm": 4.25, "learning_rate": 1.9736842105263158e-05, "loss": 0.7753, "step": 25 }, { "epoch": 6.25, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.965796709060669, "eval_runtime": 1.4191, "eval_samples_per_second": 43.689, "eval_steps_per_second": 2.819, "step": 25 }, { "epoch": 6.5, "grad_norm": 4.3125, "learning_rate": 1.8421052631578947e-05, "loss": 0.7508, "step": 26 }, { "epoch": 6.5, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9699903726577759, "eval_runtime": 1.4192, "eval_samples_per_second": 43.688, "eval_steps_per_second": 2.819, "step": 26 }, { "epoch": 6.75, "grad_norm": 3.359375, "learning_rate": 1.7105263157894737e-05, "loss": 0.7477, "step": 27 }, { "epoch": 6.75, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9734280705451965, "eval_runtime": 1.4193, "eval_samples_per_second": 43.684, "eval_steps_per_second": 2.818, "step": 27 }, { "epoch": 7.0, "grad_norm": 4.71875, "learning_rate": 1.5789473684210526e-05, "loss": 0.9474, "step": 28 }, { "epoch": 7.0, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9768341779708862, "eval_runtime": 1.4192, "eval_samples_per_second": 43.687, "eval_steps_per_second": 2.819, "step": 28 }, { "epoch": 7.25, "grad_norm": 5.125, "learning_rate": 1.4473684210526317e-05, "loss": 0.9033, "step": 29 }, { "epoch": 7.25, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9821304678916931, "eval_runtime": 1.4191, "eval_samples_per_second": 43.691, "eval_steps_per_second": 2.819, "step": 29 }, { "epoch": 7.5, "grad_norm": 3.859375, "learning_rate": 1.3157894736842106e-05, "loss": 0.7329, "step": 30 }, { "epoch": 7.5, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9849578142166138, "eval_runtime": 1.4195, "eval_samples_per_second": 43.677, "eval_steps_per_second": 2.818, "step": 30 }, { "epoch": 7.75, "grad_norm": 3.625, "learning_rate": 1.1842105263157895e-05, "loss": 0.7054, "step": 31 }, { "epoch": 7.75, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9871038794517517, "eval_runtime": 1.4195, "eval_samples_per_second": 43.677, "eval_steps_per_second": 2.818, "step": 31 }, { "epoch": 8.0, "grad_norm": 9.6875, "learning_rate": 1.0526315789473684e-05, "loss": 0.9037, "step": 32 }, { "epoch": 8.0, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.986328125, "eval_runtime": 1.3698, "eval_samples_per_second": 45.261, "eval_steps_per_second": 2.92, "step": 32 }, { "epoch": 8.25, "grad_norm": 4.0, "learning_rate": 9.210526315789474e-06, "loss": 0.8046, "step": 33 }, { "epoch": 8.25, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9895294904708862, "eval_runtime": 1.421, "eval_samples_per_second": 43.632, "eval_steps_per_second": 2.815, "step": 33 }, { "epoch": 8.5, "grad_norm": 2.359375, "learning_rate": 7.894736842105263e-06, "loss": 0.7161, "step": 34 }, { "epoch": 8.5, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9918173551559448, "eval_runtime": 1.3694, "eval_samples_per_second": 45.274, "eval_steps_per_second": 2.921, "step": 34 }, { "epoch": 8.75, "grad_norm": 7.8125, "learning_rate": 6.578947368421053e-06, "loss": 0.8239, "step": 35 }, { "epoch": 8.75, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9916952848434448, "eval_runtime": 1.4202, "eval_samples_per_second": 43.657, "eval_steps_per_second": 2.817, "step": 35 }, { "epoch": 9.0, "grad_norm": 4.375, "learning_rate": 5.263157894736842e-06, "loss": 0.8111, "step": 36 }, { "epoch": 9.0, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9914905428886414, "eval_runtime": 1.4189, "eval_samples_per_second": 43.695, "eval_steps_per_second": 2.819, "step": 36 }, { "epoch": 9.25, "grad_norm": 3.78125, "learning_rate": 3.9473684210526315e-06, "loss": 0.9011, "step": 37 }, { "epoch": 9.25, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9921599626541138, "eval_runtime": 1.4188, "eval_samples_per_second": 43.7, "eval_steps_per_second": 2.819, "step": 37 }, { "epoch": 9.5, "grad_norm": 2.90625, "learning_rate": 2.631578947368421e-06, "loss": 0.8858, "step": 38 }, { "epoch": 9.5, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9920575618743896, "eval_runtime": 1.4203, "eval_samples_per_second": 43.653, "eval_steps_per_second": 2.816, "step": 38 }, { "epoch": 9.75, "grad_norm": 3.765625, "learning_rate": 1.3157894736842106e-06, "loss": 0.822, "step": 39 }, { "epoch": 9.75, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9910297989845276, "eval_runtime": 1.4201, "eval_samples_per_second": 43.658, "eval_steps_per_second": 2.817, "step": 39 }, { "epoch": 10.0, "grad_norm": 3.203125, "learning_rate": 0.0, "loss": 0.7812, "step": 40 }, { "epoch": 10.0, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.293480615118042, "eval_loss": 0.9894664883613586, "eval_runtime": 1.4198, "eval_samples_per_second": 43.668, "eval_steps_per_second": 2.817, "step": 40 }, { "epoch": 10.0, "step": 40, "total_flos": 1.4174270733156352e+16, "train_loss": 0.8902546644210816, "train_runtime": 176.2791, "train_samples_per_second": 13.842, "train_steps_per_second": 0.227 } ], "logging_steps": 1, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.4174270733156352e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }