{ "best_metric": 1.7055974006652832, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.01483019427554501, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005932077710218003, "grad_norm": 1.5604506731033325, "learning_rate": 2e-05, "loss": 7.2816, "step": 1 }, { "epoch": 0.0005932077710218003, "eval_loss": 1.8809071779251099, "eval_runtime": 51.1364, "eval_samples_per_second": 13.884, "eval_steps_per_second": 6.942, "step": 1 }, { "epoch": 0.0011864155420436007, "grad_norm": 1.3534035682678223, "learning_rate": 4e-05, "loss": 7.2367, "step": 2 }, { "epoch": 0.0017796233130654012, "grad_norm": 1.556756615638733, "learning_rate": 6e-05, "loss": 6.6101, "step": 3 }, { "epoch": 0.0023728310840872013, "grad_norm": 1.78481125831604, "learning_rate": 8e-05, "loss": 6.6512, "step": 4 }, { "epoch": 0.002966038855109002, "grad_norm": 1.6084747314453125, "learning_rate": 0.0001, "loss": 7.5114, "step": 5 }, { "epoch": 0.002966038855109002, "eval_loss": 1.8634954690933228, "eval_runtime": 47.8443, "eval_samples_per_second": 14.84, "eval_steps_per_second": 7.42, "step": 5 }, { "epoch": 0.0035592466261308024, "grad_norm": 1.8598929643630981, "learning_rate": 0.00012, "loss": 7.3005, "step": 6 }, { "epoch": 0.004152454397152602, "grad_norm": 1.7229331731796265, "learning_rate": 0.00014, "loss": 7.0142, "step": 7 }, { "epoch": 0.004745662168174403, "grad_norm": 1.9855549335479736, "learning_rate": 0.00016, "loss": 5.9546, "step": 8 }, { "epoch": 0.005338869939196204, "grad_norm": 2.6306424140930176, "learning_rate": 0.00018, "loss": 7.6557, "step": 9 }, { "epoch": 0.005932077710218004, "grad_norm": 4.312272071838379, "learning_rate": 0.0002, "loss": 8.7644, "step": 10 }, { "epoch": 0.005932077710218004, "eval_loss": 1.7866129875183105, "eval_runtime": 47.7519, "eval_samples_per_second": 14.869, "eval_steps_per_second": 7.434, "step": 10 }, { "epoch": 0.0065252854812398045, "grad_norm": 3.453673839569092, "learning_rate": 0.00019781476007338058, "loss": 8.4542, "step": 11 }, { "epoch": 0.007118493252261605, "grad_norm": 4.104527473449707, "learning_rate": 0.0001913545457642601, "loss": 8.2059, "step": 12 }, { "epoch": 0.007711701023283405, "grad_norm": 6.156380653381348, "learning_rate": 0.00018090169943749476, "loss": 8.3057, "step": 13 }, { "epoch": 0.008304908794305205, "grad_norm": 5.152556419372559, "learning_rate": 0.00016691306063588583, "loss": 5.8191, "step": 14 }, { "epoch": 0.008898116565327005, "grad_norm": 4.139110088348389, "learning_rate": 0.00015000000000000001, "loss": 6.4268, "step": 15 }, { "epoch": 0.008898116565327005, "eval_loss": 1.7479956150054932, "eval_runtime": 47.3405, "eval_samples_per_second": 14.998, "eval_steps_per_second": 7.499, "step": 15 }, { "epoch": 0.009491324336348805, "grad_norm": 5.199703216552734, "learning_rate": 0.00013090169943749476, "loss": 7.6062, "step": 16 }, { "epoch": 0.010084532107370607, "grad_norm": 5.312143325805664, "learning_rate": 0.00011045284632676536, "loss": 5.2735, "step": 17 }, { "epoch": 0.010677739878392408, "grad_norm": 5.719979286193848, "learning_rate": 8.954715367323468e-05, "loss": 6.2765, "step": 18 }, { "epoch": 0.011270947649414208, "grad_norm": 4.406425476074219, "learning_rate": 6.909830056250527e-05, "loss": 6.6895, "step": 19 }, { "epoch": 0.011864155420436008, "grad_norm": 6.59907865524292, "learning_rate": 5.000000000000002e-05, "loss": 7.4128, "step": 20 }, { "epoch": 0.011864155420436008, "eval_loss": 1.7100764513015747, "eval_runtime": 48.7502, "eval_samples_per_second": 14.564, "eval_steps_per_second": 7.282, "step": 20 }, { "epoch": 0.012457363191457809, "grad_norm": 4.8315749168396, "learning_rate": 3.308693936411421e-05, "loss": 6.5809, "step": 21 }, { "epoch": 0.013050570962479609, "grad_norm": 6.237402439117432, "learning_rate": 1.9098300562505266e-05, "loss": 7.0893, "step": 22 }, { "epoch": 0.01364377873350141, "grad_norm": 3.9503602981567383, "learning_rate": 8.645454235739903e-06, "loss": 6.1072, "step": 23 }, { "epoch": 0.01423698650452321, "grad_norm": 4.370555400848389, "learning_rate": 2.1852399266194314e-06, "loss": 7.268, "step": 24 }, { "epoch": 0.01483019427554501, "grad_norm": 8.193671226501465, "learning_rate": 0.0, "loss": 7.2895, "step": 25 }, { "epoch": 0.01483019427554501, "eval_loss": 1.7055974006652832, "eval_runtime": 48.6257, "eval_samples_per_second": 14.601, "eval_steps_per_second": 7.301, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 828538324254720.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }