{ "best_metric": 2.580864906311035, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.05, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001, "grad_norm": 0.2811749279499054, "learning_rate": 2e-05, "loss": 2.6895, "step": 1 }, { "epoch": 0.001, "eval_loss": 3.0125324726104736, "eval_runtime": 4.6936, "eval_samples_per_second": 4.474, "eval_steps_per_second": 4.474, "step": 1 }, { "epoch": 0.002, "grad_norm": 0.2987586557865143, "learning_rate": 4e-05, "loss": 2.8912, "step": 2 }, { "epoch": 0.003, "grad_norm": 0.452608197927475, "learning_rate": 6e-05, "loss": 3.4357, "step": 3 }, { "epoch": 0.004, "grad_norm": 0.38785919547080994, "learning_rate": 8e-05, "loss": 2.5889, "step": 4 }, { "epoch": 0.005, "grad_norm": 0.38931822776794434, "learning_rate": 0.0001, "loss": 2.7513, "step": 5 }, { "epoch": 0.006, "grad_norm": 0.516417384147644, "learning_rate": 0.00012, "loss": 3.2128, "step": 6 }, { "epoch": 0.007, "grad_norm": 0.4206741750240326, "learning_rate": 0.00014, "loss": 2.9368, "step": 7 }, { "epoch": 0.008, "grad_norm": 0.48171964287757874, "learning_rate": 0.00016, "loss": 2.8618, "step": 8 }, { "epoch": 0.009, "grad_norm": 0.8544142842292786, "learning_rate": 0.00018, "loss": 3.0312, "step": 9 }, { "epoch": 0.01, "grad_norm": 0.848558247089386, "learning_rate": 0.0002, "loss": 2.9334, "step": 10 }, { "epoch": 0.011, "grad_norm": 0.8914313316345215, "learning_rate": 0.00019999996900269505, "loss": 2.7981, "step": 11 }, { "epoch": 0.012, "grad_norm": 0.6103464365005493, "learning_rate": 0.0001999998760107994, "loss": 2.7247, "step": 12 }, { "epoch": 0.013, "grad_norm": 0.7618600726127625, "learning_rate": 0.00019999972102437074, "loss": 2.472, "step": 13 }, { "epoch": 0.014, "grad_norm": 0.6825264692306519, "learning_rate": 0.00019999950404350512, "loss": 2.6008, "step": 14 }, { "epoch": 0.015, "grad_norm": 0.5940832495689392, "learning_rate": 0.00019999922506833704, "loss": 2.1996, "step": 15 }, { "epoch": 0.016, "grad_norm": 0.6273623108863831, "learning_rate": 0.00019999888409903948, "loss": 2.3565, "step": 16 }, { "epoch": 0.017, "grad_norm": 0.7437952160835266, "learning_rate": 0.00019999848113582384, "loss": 2.7232, "step": 17 }, { "epoch": 0.018, "grad_norm": 0.5971533060073853, "learning_rate": 0.0001999980161789399, "loss": 2.509, "step": 18 }, { "epoch": 0.019, "grad_norm": 0.5190719962120056, "learning_rate": 0.00019999748922867592, "loss": 2.3535, "step": 19 }, { "epoch": 0.02, "grad_norm": 0.9244285821914673, "learning_rate": 0.00019999690028535855, "loss": 2.7599, "step": 20 }, { "epoch": 0.021, "grad_norm": 0.8340674638748169, "learning_rate": 0.00019999624934935296, "loss": 3.0057, "step": 21 }, { "epoch": 0.022, "grad_norm": 1.0633089542388916, "learning_rate": 0.00019999553642106266, "loss": 2.2808, "step": 22 }, { "epoch": 0.023, "grad_norm": 4.8767266273498535, "learning_rate": 0.00019999476150092967, "loss": 2.8268, "step": 23 }, { "epoch": 0.024, "grad_norm": 2.7197344303131104, "learning_rate": 0.00019999392458943432, "loss": 2.6517, "step": 24 }, { "epoch": 0.025, "grad_norm": 0.9329593777656555, "learning_rate": 0.00019999302568709547, "loss": 2.212, "step": 25 }, { "epoch": 0.026, "grad_norm": 0.6679103374481201, "learning_rate": 0.00019999206479447045, "loss": 2.0117, "step": 26 }, { "epoch": 0.027, "grad_norm": 0.5428286790847778, "learning_rate": 0.00019999104191215493, "loss": 2.7582, "step": 27 }, { "epoch": 0.028, "grad_norm": 0.5552177429199219, "learning_rate": 0.00019998995704078305, "loss": 2.54, "step": 28 }, { "epoch": 0.029, "grad_norm": 0.5453671216964722, "learning_rate": 0.00019998881018102737, "loss": 2.5358, "step": 29 }, { "epoch": 0.03, "grad_norm": 0.47653189301490784, "learning_rate": 0.00019998760133359885, "loss": 2.2443, "step": 30 }, { "epoch": 0.031, "grad_norm": 0.755976140499115, "learning_rate": 0.0001999863304992469, "loss": 2.5519, "step": 31 }, { "epoch": 0.032, "grad_norm": 0.7680912017822266, "learning_rate": 0.00019998499767875943, "loss": 2.7503, "step": 32 }, { "epoch": 0.033, "grad_norm": 3.768080472946167, "learning_rate": 0.0001999836028729627, "loss": 2.6051, "step": 33 }, { "epoch": 0.034, "grad_norm": 0.5304062962532043, "learning_rate": 0.00019998214608272136, "loss": 2.2065, "step": 34 }, { "epoch": 0.035, "grad_norm": 1.1568998098373413, "learning_rate": 0.00019998062730893862, "loss": 2.444, "step": 35 }, { "epoch": 0.036, "grad_norm": 0.8356309533119202, "learning_rate": 0.000199979046552556, "loss": 2.5763, "step": 36 }, { "epoch": 0.037, "grad_norm": 0.5210471749305725, "learning_rate": 0.00019997740381455346, "loss": 2.8545, "step": 37 }, { "epoch": 0.038, "grad_norm": 1.550714373588562, "learning_rate": 0.00019997569909594947, "loss": 2.6236, "step": 38 }, { "epoch": 0.039, "grad_norm": 0.6044741868972778, "learning_rate": 0.0001999739323978008, "loss": 2.5349, "step": 39 }, { "epoch": 0.04, "grad_norm": 0.9703565239906311, "learning_rate": 0.00019997210372120274, "loss": 3.1004, "step": 40 }, { "epoch": 0.041, "grad_norm": 0.7796650528907776, "learning_rate": 0.000199970213067289, "loss": 2.5757, "step": 41 }, { "epoch": 0.042, "grad_norm": 0.6824871301651001, "learning_rate": 0.00019996826043723162, "loss": 2.6766, "step": 42 }, { "epoch": 0.043, "grad_norm": 0.8048773407936096, "learning_rate": 0.00019996624583224114, "loss": 2.3065, "step": 43 }, { "epoch": 0.044, "grad_norm": 0.5458154082298279, "learning_rate": 0.00019996416925356652, "loss": 2.4336, "step": 44 }, { "epoch": 0.045, "grad_norm": 0.623190701007843, "learning_rate": 0.00019996203070249516, "loss": 2.3835, "step": 45 }, { "epoch": 0.046, "grad_norm": 0.5928781032562256, "learning_rate": 0.00019995983018035278, "loss": 2.3408, "step": 46 }, { "epoch": 0.047, "grad_norm": 0.5790976881980896, "learning_rate": 0.00019995756768850364, "loss": 2.3878, "step": 47 }, { "epoch": 0.048, "grad_norm": 0.5648425817489624, "learning_rate": 0.00019995524322835034, "loss": 2.2885, "step": 48 }, { "epoch": 0.049, "grad_norm": 0.526339054107666, "learning_rate": 0.00019995285680133394, "loss": 2.408, "step": 49 }, { "epoch": 0.05, "grad_norm": 0.6333803534507751, "learning_rate": 0.00019995040840893388, "loss": 2.4391, "step": 50 }, { "epoch": 0.05, "eval_loss": 2.580864906311035, "eval_runtime": 4.8038, "eval_samples_per_second": 4.372, "eval_steps_per_second": 4.372, "step": 50 } ], "logging_steps": 1, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4108715871436800.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }