{ "best_metric": 0.6170168519020081, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.05037783375314862, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020151133501259445, "grad_norm": 0.3594292998313904, "learning_rate": 2e-05, "loss": 2.286, "step": 1 }, { "epoch": 0.0020151133501259445, "eval_loss": 0.6638228893280029, "eval_runtime": 55.2182, "eval_samples_per_second": 3.785, "eval_steps_per_second": 1.902, "step": 1 }, { "epoch": 0.004030226700251889, "grad_norm": 0.39827266335487366, "learning_rate": 4e-05, "loss": 2.6299, "step": 2 }, { "epoch": 0.006045340050377834, "grad_norm": 0.25862371921539307, "learning_rate": 6e-05, "loss": 1.2722, "step": 3 }, { "epoch": 0.008060453400503778, "grad_norm": 0.5083067417144775, "learning_rate": 8e-05, "loss": 3.3879, "step": 4 }, { "epoch": 0.010075566750629723, "grad_norm": 0.40554600954055786, "learning_rate": 0.0001, "loss": 2.5694, "step": 5 }, { "epoch": 0.010075566750629723, "eval_loss": 0.6636737585067749, "eval_runtime": 55.176, "eval_samples_per_second": 3.788, "eval_steps_per_second": 1.903, "step": 5 }, { "epoch": 0.012090680100755667, "grad_norm": 0.37122878432273865, "learning_rate": 0.00012, "loss": 2.9988, "step": 6 }, { "epoch": 0.014105793450881612, "grad_norm": 0.33736562728881836, "learning_rate": 0.00014, "loss": 2.5039, "step": 7 }, { "epoch": 0.016120906801007556, "grad_norm": 0.5041938424110413, "learning_rate": 0.00016, "loss": 2.7734, "step": 8 }, { "epoch": 0.0181360201511335, "grad_norm": 0.46313270926475525, "learning_rate": 0.00018, "loss": 2.3923, "step": 9 }, { "epoch": 0.020151133501259445, "grad_norm": 0.43415185809135437, "learning_rate": 0.0002, "loss": 2.2073, "step": 10 }, { "epoch": 0.020151133501259445, "eval_loss": 0.6502737402915955, "eval_runtime": 55.0477, "eval_samples_per_second": 3.797, "eval_steps_per_second": 1.907, "step": 10 }, { "epoch": 0.02216624685138539, "grad_norm": 0.7167869210243225, "learning_rate": 0.00019781476007338058, "loss": 2.6288, "step": 11 }, { "epoch": 0.024181360201511334, "grad_norm": 0.48966965079307556, "learning_rate": 0.0001913545457642601, "loss": 2.0753, "step": 12 }, { "epoch": 0.02619647355163728, "grad_norm": 0.4106284976005554, "learning_rate": 0.00018090169943749476, "loss": 2.1925, "step": 13 }, { "epoch": 0.028211586901763223, "grad_norm": 0.46533626317977905, "learning_rate": 0.00016691306063588583, "loss": 2.0292, "step": 14 }, { "epoch": 0.030226700251889168, "grad_norm": 1.0065481662750244, "learning_rate": 0.00015000000000000001, "loss": 2.4329, "step": 15 }, { "epoch": 0.030226700251889168, "eval_loss": 0.629341721534729, "eval_runtime": 55.0242, "eval_samples_per_second": 3.798, "eval_steps_per_second": 1.908, "step": 15 }, { "epoch": 0.03224181360201511, "grad_norm": 0.5595883131027222, "learning_rate": 0.00013090169943749476, "loss": 2.9854, "step": 16 }, { "epoch": 0.03425692695214106, "grad_norm": 0.4277346432209015, "learning_rate": 0.00011045284632676536, "loss": 1.743, "step": 17 }, { "epoch": 0.036272040302267, "grad_norm": 0.506736159324646, "learning_rate": 8.954715367323468e-05, "loss": 2.2698, "step": 18 }, { "epoch": 0.03828715365239295, "grad_norm": 0.5150989890098572, "learning_rate": 6.909830056250527e-05, "loss": 2.8678, "step": 19 }, { "epoch": 0.04030226700251889, "grad_norm": 0.3571804165840149, "learning_rate": 5.000000000000002e-05, "loss": 1.1466, "step": 20 }, { "epoch": 0.04030226700251889, "eval_loss": 0.6193917989730835, "eval_runtime": 55.0702, "eval_samples_per_second": 3.795, "eval_steps_per_second": 1.907, "step": 20 }, { "epoch": 0.04231738035264484, "grad_norm": 0.5522847771644592, "learning_rate": 3.308693936411421e-05, "loss": 3.0133, "step": 21 }, { "epoch": 0.04433249370277078, "grad_norm": 0.385016530752182, "learning_rate": 1.9098300562505266e-05, "loss": 1.3122, "step": 22 }, { "epoch": 0.04634760705289673, "grad_norm": 0.4866712689399719, "learning_rate": 8.645454235739903e-06, "loss": 2.1991, "step": 23 }, { "epoch": 0.04836272040302267, "grad_norm": 0.6545142531394958, "learning_rate": 2.1852399266194314e-06, "loss": 1.0803, "step": 24 }, { "epoch": 0.05037783375314862, "grad_norm": 0.6084586381912231, "learning_rate": 0.0, "loss": 2.8759, "step": 25 }, { "epoch": 0.05037783375314862, "eval_loss": 0.6170168519020081, "eval_runtime": 55.1544, "eval_samples_per_second": 3.789, "eval_steps_per_second": 1.904, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.698683682816e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }