{ "best_metric": 1.2021186351776123, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 3.0372881355932204, "eval_steps": 25, "global_step": 70, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04338983050847458, "grad_norm": 0.19333525002002716, "learning_rate": 0.0001, "loss": 1.4844, "step": 1 }, { "epoch": 0.04338983050847458, "eval_loss": 1.57308030128479, "eval_runtime": 1.681, "eval_samples_per_second": 29.744, "eval_steps_per_second": 7.733, "step": 1 }, { "epoch": 0.08677966101694916, "grad_norm": 0.22077158093452454, "learning_rate": 0.0002, "loss": 1.5603, "step": 2 }, { "epoch": 0.13016949152542373, "grad_norm": 0.258558452129364, "learning_rate": 0.00019990396773221354, "loss": 1.4966, "step": 3 }, { "epoch": 0.17355932203389832, "grad_norm": 0.2679038345813751, "learning_rate": 0.00019961607586655312, "loss": 1.4893, "step": 4 }, { "epoch": 0.21694915254237288, "grad_norm": 0.2787069082260132, "learning_rate": 0.00019913693877876847, "loss": 1.5337, "step": 5 }, { "epoch": 0.26033898305084746, "grad_norm": 0.21714474260807037, "learning_rate": 0.00019846757897155117, "loss": 1.4605, "step": 6 }, { "epoch": 0.303728813559322, "grad_norm": 0.2101472020149231, "learning_rate": 0.00019760942489246237, "loss": 1.4191, "step": 7 }, { "epoch": 0.34711864406779663, "grad_norm": 0.2357436865568161, "learning_rate": 0.00019656430788555373, "loss": 1.3739, "step": 8 }, { "epoch": 0.3905084745762712, "grad_norm": 0.22543178498744965, "learning_rate": 0.00019533445828318692, "loss": 1.3078, "step": 9 }, { "epoch": 0.43389830508474575, "grad_norm": 0.2066933512687683, "learning_rate": 0.000193922500646392, "loss": 1.3297, "step": 10 }, { "epoch": 0.47728813559322036, "grad_norm": 0.2858045995235443, "learning_rate": 0.0001923314481639219, "loss": 1.4037, "step": 11 }, { "epoch": 0.5206779661016949, "grad_norm": 0.3108825385570526, "learning_rate": 0.00019056469622195563, "loss": 1.3338, "step": 12 }, { "epoch": 0.5640677966101695, "grad_norm": 0.2868238091468811, "learning_rate": 0.00018862601515817325, "loss": 1.3208, "step": 13 }, { "epoch": 0.607457627118644, "grad_norm": 0.1842653751373291, "learning_rate": 0.00018651954221566528, "loss": 1.2517, "step": 14 }, { "epoch": 0.6508474576271186, "grad_norm": 0.23613254725933075, "learning_rate": 0.0001842497727138472, "loss": 1.2301, "step": 15 }, { "epoch": 0.6942372881355933, "grad_norm": 0.27285611629486084, "learning_rate": 0.00018182155045522159, "loss": 1.2618, "step": 16 }, { "epoch": 0.7376271186440678, "grad_norm": 0.28099900484085083, "learning_rate": 0.00017924005738845914, "loss": 1.3992, "step": 17 }, { "epoch": 0.7810169491525424, "grad_norm": 0.20399919152259827, "learning_rate": 0.00017651080254985933, "loss": 1.3072, "step": 18 }, { "epoch": 0.8244067796610169, "grad_norm": 0.21079455316066742, "learning_rate": 0.00017363961030678927, "loss": 1.3045, "step": 19 }, { "epoch": 0.8677966101694915, "grad_norm": 0.19453535974025726, "learning_rate": 0.00017063260792819014, "loss": 1.194, "step": 20 }, { "epoch": 0.9111864406779661, "grad_norm": 0.15201877057552338, "learning_rate": 0.0001674962125086765, "loss": 1.214, "step": 21 }, { "epoch": 0.9545762711864407, "grad_norm": 0.13938187062740326, "learning_rate": 0.00016423711727413308, "loss": 1.2856, "step": 22 }, { "epoch": 0.9979661016949153, "grad_norm": 0.2446557730436325, "learning_rate": 0.00016086227729803328, "loss": 1.427, "step": 23 }, { "epoch": 1.0413559322033898, "grad_norm": 0.4381452202796936, "learning_rate": 0.00015737889465896202, "loss": 2.4556, "step": 24 }, { "epoch": 1.0847457627118644, "grad_norm": 0.13573524355888367, "learning_rate": 0.00015379440307101706, "loss": 1.2265, "step": 25 }, { "epoch": 1.0847457627118644, "eval_loss": 1.2588658332824707, "eval_runtime": 1.6779, "eval_samples_per_second": 29.799, "eval_steps_per_second": 7.748, "step": 25 }, { "epoch": 1.128135593220339, "grad_norm": 0.14407069981098175, "learning_rate": 0.00015011645201988848, "loss": 1.1929, "step": 26 }, { "epoch": 1.1715254237288135, "grad_norm": 0.15989331901073456, "learning_rate": 0.00014635289043847053, "loss": 1.1703, "step": 27 }, { "epoch": 1.214915254237288, "grad_norm": 0.16030077636241913, "learning_rate": 0.0001425117499568438, "loss": 1.2557, "step": 28 }, { "epoch": 1.2583050847457626, "grad_norm": 0.18936704099178314, "learning_rate": 0.00013860122776237113, "loss": 1.2149, "step": 29 }, { "epoch": 1.3016949152542372, "grad_norm": 0.24135200679302216, "learning_rate": 0.0001346296691064875, "loss": 1.3461, "step": 30 }, { "epoch": 1.345084745762712, "grad_norm": 0.17525914311408997, "learning_rate": 0.00013060554949551208, "loss": 1.1852, "step": 31 }, { "epoch": 1.3884745762711863, "grad_norm": 0.13595549762248993, "learning_rate": 0.00012653745660349134, "loss": 1.1576, "step": 32 }, { "epoch": 1.431864406779661, "grad_norm": 0.18319641053676605, "learning_rate": 0.0001224340719456694, "loss": 1.1778, "step": 33 }, { "epoch": 1.4752542372881357, "grad_norm": 0.21508461236953735, "learning_rate": 0.00011830415235169719, "loss": 1.3049, "step": 34 }, { "epoch": 1.5186440677966102, "grad_norm": 0.18303513526916504, "learning_rate": 0.00011415651127811657, "loss": 1.1947, "step": 35 }, { "epoch": 1.5620338983050848, "grad_norm": 0.17415650188922882, "learning_rate": 0.00011000000000000002, "loss": 1.2696, "step": 36 }, { "epoch": 1.6054237288135593, "grad_norm": 0.16625842452049255, "learning_rate": 0.00010584348872188346, "loss": 1.1646, "step": 37 }, { "epoch": 1.6488135593220339, "grad_norm": 0.17237333953380585, "learning_rate": 0.00010169584764830285, "loss": 1.1039, "step": 38 }, { "epoch": 1.6922033898305084, "grad_norm": 0.17974147200584412, "learning_rate": 9.756592805433063e-05, "loss": 1.1694, "step": 39 }, { "epoch": 1.735593220338983, "grad_norm": 0.23319189250469208, "learning_rate": 9.346254339650866e-05, "loss": 1.3278, "step": 40 }, { "epoch": 1.7789830508474576, "grad_norm": 0.219391867518425, "learning_rate": 8.939445050448795e-05, "loss": 1.2026, "step": 41 }, { "epoch": 1.8223728813559323, "grad_norm": 0.23197093605995178, "learning_rate": 8.537033089351256e-05, "loss": 1.2161, "step": 42 }, { "epoch": 1.8657627118644067, "grad_norm": 0.16063331067562103, "learning_rate": 8.139877223762886e-05, "loss": 1.1265, "step": 43 }, { "epoch": 1.9091525423728815, "grad_norm": 0.1530478149652481, "learning_rate": 7.748825004315625e-05, "loss": 1.1452, "step": 44 }, { "epoch": 1.9525423728813558, "grad_norm": 0.19677256047725677, "learning_rate": 7.364710956152947e-05, "loss": 1.2146, "step": 45 }, { "epoch": 1.9959322033898306, "grad_norm": 0.26235055923461914, "learning_rate": 6.988354798011156e-05, "loss": 1.3677, "step": 46 }, { "epoch": 2.039322033898305, "grad_norm": 0.5380977988243103, "learning_rate": 6.620559692898295e-05, "loss": 2.2549, "step": 47 }, { "epoch": 2.0827118644067797, "grad_norm": 0.14503780007362366, "learning_rate": 6.262110534103797e-05, "loss": 1.1591, "step": 48 }, { "epoch": 2.1261016949152545, "grad_norm": 0.13808022439479828, "learning_rate": 5.913772270196674e-05, "loss": 1.1382, "step": 49 }, { "epoch": 2.169491525423729, "grad_norm": 0.1822243332862854, "learning_rate": 5.5762882725866936e-05, "loss": 1.1082, "step": 50 }, { "epoch": 2.169491525423729, "eval_loss": 1.2021186351776123, "eval_runtime": 1.679, "eval_samples_per_second": 29.779, "eval_steps_per_second": 7.743, "step": 50 }, { "epoch": 2.2128813559322036, "grad_norm": 0.208404541015625, "learning_rate": 5.250378749132351e-05, "loss": 1.235, "step": 51 }, { "epoch": 2.256271186440678, "grad_norm": 0.17768944799900055, "learning_rate": 4.9367392071809857e-05, "loss": 1.1382, "step": 52 }, { "epoch": 2.2996610169491527, "grad_norm": 0.2600400447845459, "learning_rate": 4.6360389693210735e-05, "loss": 1.3375, "step": 53 }, { "epoch": 2.343050847457627, "grad_norm": 0.17488409578800201, "learning_rate": 4.348919745014069e-05, "loss": 1.1467, "step": 54 }, { "epoch": 2.386440677966102, "grad_norm": 0.1583094447851181, "learning_rate": 4.07599426115409e-05, "loss": 1.0945, "step": 55 }, { "epoch": 2.429830508474576, "grad_norm": 0.18399091064929962, "learning_rate": 3.817844954477845e-05, "loss": 1.1229, "step": 56 }, { "epoch": 2.473220338983051, "grad_norm": 0.21471183001995087, "learning_rate": 3.5750227286152804e-05, "loss": 1.2722, "step": 57 }, { "epoch": 2.5166101694915253, "grad_norm": 0.17655105888843536, "learning_rate": 3.348045778433474e-05, "loss": 1.1581, "step": 58 }, { "epoch": 2.56, "grad_norm": 0.1815287321805954, "learning_rate": 3.137398484182675e-05, "loss": 1.2125, "step": 59 }, { "epoch": 2.6033898305084744, "grad_norm": 0.14957380294799805, "learning_rate": 2.9435303778044405e-05, "loss": 1.1048, "step": 60 }, { "epoch": 2.646779661016949, "grad_norm": 0.16781945526599884, "learning_rate": 2.766855183607811e-05, "loss": 1.0719, "step": 61 }, { "epoch": 2.690169491525424, "grad_norm": 0.20620030164718628, "learning_rate": 2.6077499353607988e-05, "loss": 1.1494, "step": 62 }, { "epoch": 2.7335593220338983, "grad_norm": 0.2556055784225464, "learning_rate": 2.4665541716813075e-05, "loss": 1.2749, "step": 63 }, { "epoch": 2.7769491525423726, "grad_norm": 0.21309202909469604, "learning_rate": 2.343569211444629e-05, "loss": 1.1472, "step": 64 }, { "epoch": 2.8203389830508474, "grad_norm": 0.18439190089702606, "learning_rate": 2.2390575107537647e-05, "loss": 1.2353, "step": 65 }, { "epoch": 2.863728813559322, "grad_norm": 0.1528278887271881, "learning_rate": 2.153242102844884e-05, "loss": 1.118, "step": 66 }, { "epoch": 2.9071186440677965, "grad_norm": 0.14112654328346252, "learning_rate": 2.0863061221231545e-05, "loss": 1.1025, "step": 67 }, { "epoch": 2.9505084745762713, "grad_norm": 0.17627808451652527, "learning_rate": 2.03839241334469e-05, "loss": 1.142, "step": 68 }, { "epoch": 2.9938983050847456, "grad_norm": 0.2500801682472229, "learning_rate": 2.0096032267786484e-05, "loss": 1.3468, "step": 69 }, { "epoch": 3.0372881355932204, "grad_norm": 0.5967485308647156, "learning_rate": 2e-05, "loss": 2.3796, "step": 70 } ], "logging_steps": 1, "max_steps": 70, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 60, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2467356978642944e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }