|
{ |
|
"best_metric": 1.2021186351776123, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 3.0372881355932204, |
|
"eval_steps": 25, |
|
"global_step": 70, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04338983050847458, |
|
"grad_norm": 0.19333525002002716, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4844, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04338983050847458, |
|
"eval_loss": 1.57308030128479, |
|
"eval_runtime": 1.681, |
|
"eval_samples_per_second": 29.744, |
|
"eval_steps_per_second": 7.733, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08677966101694916, |
|
"grad_norm": 0.22077158093452454, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5603, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.13016949152542373, |
|
"grad_norm": 0.258558452129364, |
|
"learning_rate": 0.00019990396773221354, |
|
"loss": 1.4966, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.17355932203389832, |
|
"grad_norm": 0.2679038345813751, |
|
"learning_rate": 0.00019961607586655312, |
|
"loss": 1.4893, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.21694915254237288, |
|
"grad_norm": 0.2787069082260132, |
|
"learning_rate": 0.00019913693877876847, |
|
"loss": 1.5337, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.26033898305084746, |
|
"grad_norm": 0.21714474260807037, |
|
"learning_rate": 0.00019846757897155117, |
|
"loss": 1.4605, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.303728813559322, |
|
"grad_norm": 0.2101472020149231, |
|
"learning_rate": 0.00019760942489246237, |
|
"loss": 1.4191, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.34711864406779663, |
|
"grad_norm": 0.2357436865568161, |
|
"learning_rate": 0.00019656430788555373, |
|
"loss": 1.3739, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.3905084745762712, |
|
"grad_norm": 0.22543178498744965, |
|
"learning_rate": 0.00019533445828318692, |
|
"loss": 1.3078, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.43389830508474575, |
|
"grad_norm": 0.2066933512687683, |
|
"learning_rate": 0.000193922500646392, |
|
"loss": 1.3297, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.47728813559322036, |
|
"grad_norm": 0.2858045995235443, |
|
"learning_rate": 0.0001923314481639219, |
|
"loss": 1.4037, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.5206779661016949, |
|
"grad_norm": 0.3108825385570526, |
|
"learning_rate": 0.00019056469622195563, |
|
"loss": 1.3338, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.5640677966101695, |
|
"grad_norm": 0.2868238091468811, |
|
"learning_rate": 0.00018862601515817325, |
|
"loss": 1.3208, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.607457627118644, |
|
"grad_norm": 0.1842653751373291, |
|
"learning_rate": 0.00018651954221566528, |
|
"loss": 1.2517, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.6508474576271186, |
|
"grad_norm": 0.23613254725933075, |
|
"learning_rate": 0.0001842497727138472, |
|
"loss": 1.2301, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.6942372881355933, |
|
"grad_norm": 0.27285611629486084, |
|
"learning_rate": 0.00018182155045522159, |
|
"loss": 1.2618, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.7376271186440678, |
|
"grad_norm": 0.28099900484085083, |
|
"learning_rate": 0.00017924005738845914, |
|
"loss": 1.3992, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.7810169491525424, |
|
"grad_norm": 0.20399919152259827, |
|
"learning_rate": 0.00017651080254985933, |
|
"loss": 1.3072, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.8244067796610169, |
|
"grad_norm": 0.21079455316066742, |
|
"learning_rate": 0.00017363961030678927, |
|
"loss": 1.3045, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.8677966101694915, |
|
"grad_norm": 0.19453535974025726, |
|
"learning_rate": 0.00017063260792819014, |
|
"loss": 1.194, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9111864406779661, |
|
"grad_norm": 0.15201877057552338, |
|
"learning_rate": 0.0001674962125086765, |
|
"loss": 1.214, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.9545762711864407, |
|
"grad_norm": 0.13938187062740326, |
|
"learning_rate": 0.00016423711727413308, |
|
"loss": 1.2856, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.9979661016949153, |
|
"grad_norm": 0.2446557730436325, |
|
"learning_rate": 0.00016086227729803328, |
|
"loss": 1.427, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0413559322033898, |
|
"grad_norm": 0.4381452202796936, |
|
"learning_rate": 0.00015737889465896202, |
|
"loss": 2.4556, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.0847457627118644, |
|
"grad_norm": 0.13573524355888367, |
|
"learning_rate": 0.00015379440307101706, |
|
"loss": 1.2265, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.0847457627118644, |
|
"eval_loss": 1.2588658332824707, |
|
"eval_runtime": 1.6779, |
|
"eval_samples_per_second": 29.799, |
|
"eval_steps_per_second": 7.748, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.128135593220339, |
|
"grad_norm": 0.14407069981098175, |
|
"learning_rate": 0.00015011645201988848, |
|
"loss": 1.1929, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.1715254237288135, |
|
"grad_norm": 0.15989331901073456, |
|
"learning_rate": 0.00014635289043847053, |
|
"loss": 1.1703, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.214915254237288, |
|
"grad_norm": 0.16030077636241913, |
|
"learning_rate": 0.0001425117499568438, |
|
"loss": 1.2557, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.2583050847457626, |
|
"grad_norm": 0.18936704099178314, |
|
"learning_rate": 0.00013860122776237113, |
|
"loss": 1.2149, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.3016949152542372, |
|
"grad_norm": 0.24135200679302216, |
|
"learning_rate": 0.0001346296691064875, |
|
"loss": 1.3461, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.345084745762712, |
|
"grad_norm": 0.17525914311408997, |
|
"learning_rate": 0.00013060554949551208, |
|
"loss": 1.1852, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.3884745762711863, |
|
"grad_norm": 0.13595549762248993, |
|
"learning_rate": 0.00012653745660349134, |
|
"loss": 1.1576, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.431864406779661, |
|
"grad_norm": 0.18319641053676605, |
|
"learning_rate": 0.0001224340719456694, |
|
"loss": 1.1778, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.4752542372881357, |
|
"grad_norm": 0.21508461236953735, |
|
"learning_rate": 0.00011830415235169719, |
|
"loss": 1.3049, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.5186440677966102, |
|
"grad_norm": 0.18303513526916504, |
|
"learning_rate": 0.00011415651127811657, |
|
"loss": 1.1947, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.5620338983050848, |
|
"grad_norm": 0.17415650188922882, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 1.2696, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.6054237288135593, |
|
"grad_norm": 0.16625842452049255, |
|
"learning_rate": 0.00010584348872188346, |
|
"loss": 1.1646, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.6488135593220339, |
|
"grad_norm": 0.17237333953380585, |
|
"learning_rate": 0.00010169584764830285, |
|
"loss": 1.1039, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.6922033898305084, |
|
"grad_norm": 0.17974147200584412, |
|
"learning_rate": 9.756592805433063e-05, |
|
"loss": 1.1694, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.735593220338983, |
|
"grad_norm": 0.23319189250469208, |
|
"learning_rate": 9.346254339650866e-05, |
|
"loss": 1.3278, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.7789830508474576, |
|
"grad_norm": 0.219391867518425, |
|
"learning_rate": 8.939445050448795e-05, |
|
"loss": 1.2026, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.8223728813559323, |
|
"grad_norm": 0.23197093605995178, |
|
"learning_rate": 8.537033089351256e-05, |
|
"loss": 1.2161, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.8657627118644067, |
|
"grad_norm": 0.16063331067562103, |
|
"learning_rate": 8.139877223762886e-05, |
|
"loss": 1.1265, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.9091525423728815, |
|
"grad_norm": 0.1530478149652481, |
|
"learning_rate": 7.748825004315625e-05, |
|
"loss": 1.1452, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.9525423728813558, |
|
"grad_norm": 0.19677256047725677, |
|
"learning_rate": 7.364710956152947e-05, |
|
"loss": 1.2146, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.9959322033898306, |
|
"grad_norm": 0.26235055923461914, |
|
"learning_rate": 6.988354798011156e-05, |
|
"loss": 1.3677, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.039322033898305, |
|
"grad_norm": 0.5380977988243103, |
|
"learning_rate": 6.620559692898295e-05, |
|
"loss": 2.2549, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.0827118644067797, |
|
"grad_norm": 0.14503780007362366, |
|
"learning_rate": 6.262110534103797e-05, |
|
"loss": 1.1591, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.1261016949152545, |
|
"grad_norm": 0.13808022439479828, |
|
"learning_rate": 5.913772270196674e-05, |
|
"loss": 1.1382, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.169491525423729, |
|
"grad_norm": 0.1822243332862854, |
|
"learning_rate": 5.5762882725866936e-05, |
|
"loss": 1.1082, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.169491525423729, |
|
"eval_loss": 1.2021186351776123, |
|
"eval_runtime": 1.679, |
|
"eval_samples_per_second": 29.779, |
|
"eval_steps_per_second": 7.743, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.2128813559322036, |
|
"grad_norm": 0.208404541015625, |
|
"learning_rate": 5.250378749132351e-05, |
|
"loss": 1.235, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.256271186440678, |
|
"grad_norm": 0.17768944799900055, |
|
"learning_rate": 4.9367392071809857e-05, |
|
"loss": 1.1382, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.2996610169491527, |
|
"grad_norm": 0.2600400447845459, |
|
"learning_rate": 4.6360389693210735e-05, |
|
"loss": 1.3375, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.343050847457627, |
|
"grad_norm": 0.17488409578800201, |
|
"learning_rate": 4.348919745014069e-05, |
|
"loss": 1.1467, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.386440677966102, |
|
"grad_norm": 0.1583094447851181, |
|
"learning_rate": 4.07599426115409e-05, |
|
"loss": 1.0945, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.429830508474576, |
|
"grad_norm": 0.18399091064929962, |
|
"learning_rate": 3.817844954477845e-05, |
|
"loss": 1.1229, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.473220338983051, |
|
"grad_norm": 0.21471183001995087, |
|
"learning_rate": 3.5750227286152804e-05, |
|
"loss": 1.2722, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.5166101694915253, |
|
"grad_norm": 0.17655105888843536, |
|
"learning_rate": 3.348045778433474e-05, |
|
"loss": 1.1581, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.1815287321805954, |
|
"learning_rate": 3.137398484182675e-05, |
|
"loss": 1.2125, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.6033898305084744, |
|
"grad_norm": 0.14957380294799805, |
|
"learning_rate": 2.9435303778044405e-05, |
|
"loss": 1.1048, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.646779661016949, |
|
"grad_norm": 0.16781945526599884, |
|
"learning_rate": 2.766855183607811e-05, |
|
"loss": 1.0719, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.690169491525424, |
|
"grad_norm": 0.20620030164718628, |
|
"learning_rate": 2.6077499353607988e-05, |
|
"loss": 1.1494, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.7335593220338983, |
|
"grad_norm": 0.2556055784225464, |
|
"learning_rate": 2.4665541716813075e-05, |
|
"loss": 1.2749, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.7769491525423726, |
|
"grad_norm": 0.21309202909469604, |
|
"learning_rate": 2.343569211444629e-05, |
|
"loss": 1.1472, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.8203389830508474, |
|
"grad_norm": 0.18439190089702606, |
|
"learning_rate": 2.2390575107537647e-05, |
|
"loss": 1.2353, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.863728813559322, |
|
"grad_norm": 0.1528278887271881, |
|
"learning_rate": 2.153242102844884e-05, |
|
"loss": 1.118, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.9071186440677965, |
|
"grad_norm": 0.14112654328346252, |
|
"learning_rate": 2.0863061221231545e-05, |
|
"loss": 1.1025, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.9505084745762713, |
|
"grad_norm": 0.17627808451652527, |
|
"learning_rate": 2.03839241334469e-05, |
|
"loss": 1.142, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.9938983050847456, |
|
"grad_norm": 0.2500801682472229, |
|
"learning_rate": 2.0096032267786484e-05, |
|
"loss": 1.3468, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0372881355932204, |
|
"grad_norm": 0.5967485308647156, |
|
"learning_rate": 2e-05, |
|
"loss": 2.3796, |
|
"step": 70 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 70, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 60, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2467356978642944e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|