|
{ |
|
"best_metric": 10.301432609558105, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.01404908751176611, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.809817502353222e-05, |
|
"eval_loss": 10.378629684448242, |
|
"eval_runtime": 60.0386, |
|
"eval_samples_per_second": 249.589, |
|
"eval_steps_per_second": 62.41, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002809817502353222, |
|
"grad_norm": 0.19126524031162262, |
|
"learning_rate": 4.0600000000000004e-05, |
|
"loss": 20.7601, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0005619635004706444, |
|
"grad_norm": 0.1812351495027542, |
|
"learning_rate": 8.120000000000001e-05, |
|
"loss": 20.7579, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0008429452507059666, |
|
"grad_norm": 0.21598900854587555, |
|
"learning_rate": 0.00012179999999999999, |
|
"loss": 20.7557, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0011239270009412888, |
|
"grad_norm": 0.29655301570892334, |
|
"learning_rate": 0.00016240000000000002, |
|
"loss": 20.7436, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.001404908751176611, |
|
"grad_norm": 0.3212806284427643, |
|
"learning_rate": 0.000203, |
|
"loss": 20.7365, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.001404908751176611, |
|
"eval_loss": 10.362621307373047, |
|
"eval_runtime": 60.4081, |
|
"eval_samples_per_second": 248.063, |
|
"eval_steps_per_second": 62.028, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0016858905014119332, |
|
"grad_norm": 0.4301055669784546, |
|
"learning_rate": 0.00020275275110137215, |
|
"loss": 20.7188, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0019668722516472557, |
|
"grad_norm": 0.3765835165977478, |
|
"learning_rate": 0.00020201220897726938, |
|
"loss": 20.6809, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0022478540018825775, |
|
"grad_norm": 0.3050801157951355, |
|
"learning_rate": 0.00020078198147448128, |
|
"loss": 20.6434, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0025288357521178998, |
|
"grad_norm": 0.31923723220825195, |
|
"learning_rate": 0.00019906806213773937, |
|
"loss": 20.6134, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.002809817502353222, |
|
"grad_norm": 0.3668956160545349, |
|
"learning_rate": 0.0001968788010097697, |
|
"loss": 20.6138, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002809817502353222, |
|
"eval_loss": 10.308439254760742, |
|
"eval_runtime": 59.3542, |
|
"eval_samples_per_second": 252.467, |
|
"eval_steps_per_second": 63.129, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0030907992525885442, |
|
"grad_norm": 0.20467126369476318, |
|
"learning_rate": 0.00019422486395072398, |
|
"loss": 20.6304, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0033717810028238665, |
|
"grad_norm": 0.2518371343612671, |
|
"learning_rate": 0.0001911191806751811, |
|
"loss": 20.6217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0036527627530591887, |
|
"grad_norm": 0.1735089272260666, |
|
"learning_rate": 0.00018757688175987723, |
|
"loss": 20.6238, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.003933744503294511, |
|
"grad_norm": 0.17584048211574554, |
|
"learning_rate": 0.00018361522492905716, |
|
"loss": 20.6006, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.004214726253529834, |
|
"grad_norm": 0.20633459091186523, |
|
"learning_rate": 0.00017925351097657625, |
|
"loss": 20.6024, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004214726253529834, |
|
"eval_loss": 10.306268692016602, |
|
"eval_runtime": 59.8224, |
|
"eval_samples_per_second": 250.492, |
|
"eval_steps_per_second": 62.635, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004495708003765155, |
|
"grad_norm": 0.18568071722984314, |
|
"learning_rate": 0.00017451298973437308, |
|
"loss": 20.6288, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.004776689754000477, |
|
"grad_norm": 0.2910362184047699, |
|
"learning_rate": 0.0001694167565454241, |
|
"loss": 20.611, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0050576715042357995, |
|
"grad_norm": 0.2217985987663269, |
|
"learning_rate": 0.0001639896397455543, |
|
"loss": 20.618, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.005338653254471122, |
|
"grad_norm": 0.19911141693592072, |
|
"learning_rate": 0.0001582580797022808, |
|
"loss": 20.6091, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.005619635004706444, |
|
"grad_norm": 0.28544291853904724, |
|
"learning_rate": 0.00015225, |
|
"loss": 20.6026, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.005619635004706444, |
|
"eval_loss": 10.305092811584473, |
|
"eval_runtime": 60.5578, |
|
"eval_samples_per_second": 247.45, |
|
"eval_steps_per_second": 61.875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.005900616754941766, |
|
"grad_norm": 0.21859851479530334, |
|
"learning_rate": 0.00014599467139909136, |
|
"loss": 20.6277, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0061815985051770885, |
|
"grad_norm": 0.2270508110523224, |
|
"learning_rate": 0.0001395225692317151, |
|
"loss": 20.6168, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.006462580255412411, |
|
"grad_norm": 0.17180295288562775, |
|
"learning_rate": 0.00013286522492905717, |
|
"loss": 20.6051, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.006743562005647733, |
|
"grad_norm": 0.2308344542980194, |
|
"learning_rate": 0.00012605507240336626, |
|
"loss": 20.6047, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.007024543755883055, |
|
"grad_norm": 0.2163165658712387, |
|
"learning_rate": 0.00011912529003319345, |
|
"loss": 20.5976, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.007024543755883055, |
|
"eval_loss": 10.303328514099121, |
|
"eval_runtime": 60.606, |
|
"eval_samples_per_second": 247.253, |
|
"eval_steps_per_second": 61.826, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0073055255061183775, |
|
"grad_norm": 0.1643211990594864, |
|
"learning_rate": 0.00011210963902166683, |
|
"loss": 20.6166, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0075865072563537, |
|
"grad_norm": 0.13470244407653809, |
|
"learning_rate": 0.00010504229891530386, |
|
"loss": 20.6187, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.007867489006589023, |
|
"grad_norm": 0.15561600029468536, |
|
"learning_rate": 9.795770108469618e-05, |
|
"loss": 20.5993, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.008148470756824344, |
|
"grad_norm": 0.1637904942035675, |
|
"learning_rate": 9.08903609783332e-05, |
|
"loss": 20.6033, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.008429452507059667, |
|
"grad_norm": 0.2457241415977478, |
|
"learning_rate": 8.387470996680658e-05, |
|
"loss": 20.581, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.008429452507059667, |
|
"eval_loss": 10.302334785461426, |
|
"eval_runtime": 59.2759, |
|
"eval_samples_per_second": 252.801, |
|
"eval_steps_per_second": 63.213, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.008710434257294989, |
|
"grad_norm": 0.1403207629919052, |
|
"learning_rate": 7.694492759663374e-05, |
|
"loss": 20.6175, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.00899141600753031, |
|
"grad_norm": 0.1851751208305359, |
|
"learning_rate": 7.013477507094284e-05, |
|
"loss": 20.6056, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.009272397757765633, |
|
"grad_norm": 0.23650233447551727, |
|
"learning_rate": 6.347743076828492e-05, |
|
"loss": 20.6097, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.009553379508000955, |
|
"grad_norm": 0.2193847894668579, |
|
"learning_rate": 5.700532860090863e-05, |
|
"loss": 20.5948, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.009834361258236278, |
|
"grad_norm": 0.2748495638370514, |
|
"learning_rate": 5.075000000000002e-05, |
|
"loss": 20.5916, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.009834361258236278, |
|
"eval_loss": 10.30180835723877, |
|
"eval_runtime": 60.3366, |
|
"eval_samples_per_second": 248.357, |
|
"eval_steps_per_second": 62.102, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.010115343008471599, |
|
"grad_norm": 0.2530926764011383, |
|
"learning_rate": 4.4741920297719214e-05, |
|
"loss": 20.6187, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.010396324758706922, |
|
"grad_norm": 0.18327485024929047, |
|
"learning_rate": 3.901036025444568e-05, |
|
"loss": 20.6102, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.010677306508942244, |
|
"grad_norm": 0.1382702887058258, |
|
"learning_rate": 3.358324345457592e-05, |
|
"loss": 20.6094, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.010958288259177567, |
|
"grad_norm": 0.19693830609321594, |
|
"learning_rate": 2.8487010265626928e-05, |
|
"loss": 20.5886, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.011239270009412888, |
|
"grad_norm": 0.26368966698646545, |
|
"learning_rate": 2.3746489023423744e-05, |
|
"loss": 20.5852, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.011239270009412888, |
|
"eval_loss": 10.301532745361328, |
|
"eval_runtime": 59.6237, |
|
"eval_samples_per_second": 251.326, |
|
"eval_steps_per_second": 62.844, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.011520251759648211, |
|
"grad_norm": 0.15768560767173767, |
|
"learning_rate": 1.9384775070942844e-05, |
|
"loss": 20.6082, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.011801233509883532, |
|
"grad_norm": 0.19848321378231049, |
|
"learning_rate": 1.5423118240122765e-05, |
|
"loss": 20.5956, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.012082215260118856, |
|
"grad_norm": 0.14942528307437897, |
|
"learning_rate": 1.188081932481891e-05, |
|
"loss": 20.6074, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.012363197010354177, |
|
"grad_norm": 0.21457645297050476, |
|
"learning_rate": 8.775136049276001e-06, |
|
"loss": 20.5856, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.0126441787605895, |
|
"grad_norm": 0.3364667296409607, |
|
"learning_rate": 6.121198990230306e-06, |
|
"loss": 20.5984, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0126441787605895, |
|
"eval_loss": 10.30145263671875, |
|
"eval_runtime": 60.1852, |
|
"eval_samples_per_second": 248.982, |
|
"eval_steps_per_second": 62.258, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.012925160510824821, |
|
"grad_norm": 0.16703709959983826, |
|
"learning_rate": 3.931937862260632e-06, |
|
"loss": 20.6069, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.013206142261060145, |
|
"grad_norm": 0.19127300381660461, |
|
"learning_rate": 2.2180185255187225e-06, |
|
"loss": 20.6087, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.013487124011295466, |
|
"grad_norm": 0.19735532999038696, |
|
"learning_rate": 9.877910227306082e-07, |
|
"loss": 20.6209, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.013768105761530789, |
|
"grad_norm": 0.1791822463274002, |
|
"learning_rate": 2.472488986278439e-07, |
|
"loss": 20.5952, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.01404908751176611, |
|
"grad_norm": 0.20496730506420135, |
|
"learning_rate": 0.0, |
|
"loss": 20.5889, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01404908751176611, |
|
"eval_loss": 10.301432609558105, |
|
"eval_runtime": 60.1498, |
|
"eval_samples_per_second": 249.128, |
|
"eval_steps_per_second": 62.294, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 21494130278400.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|