Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb408537b3e3d504b979dffac54d392679a1d11cf142d399473aec3dd6928026
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d973b97867ef76473331d34825d487f2d9e8af84cb65d946c99e6cd202e7ad5
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8ce5febb753c7d438869a7aa90846dd40ed4dba2c8c6dc274cd0a9f957f6a1b
 size 102864548

 version https://git-lfs.github.com/spec/v1
+oid sha256:f865acd2f46dbf046605b28ddd00a46c7aa84dd8dbe1d104d8aaa5a5b65da011
 size 102864548

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd81b0b36f3faa4623abbd1c809d10ef711912da602085e3df863933cf53913b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d8ef8f02f8e5353e456df7d5019593bbf407d3ac71f7d751ba345432c8b1fb7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48a1533051e4bee653afc683a4359c329f95831c0354ae8442616cabf80d0caa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5efc880080e0854e5765c7a5ea108f46d0ccbf30b054c9bbb5a2162a1c5babf
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2072490453720093,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.03834355828220859,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 33.958,
       "eval_steps_per_second": 8.489,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7999304446771200.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.1849799156188965,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.05112474437627812,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.958,
       "eval_steps_per_second": 8.489,
       "step": 150
+    },
+    {
+      "epoch": 0.03859918200408998,
+      "grad_norm": 0.7869307994842529,
+      "learning_rate": 2.5815263157894736e-05,
+      "loss": 1.1642,
+      "step": 151
+    },
+    {
+      "epoch": 0.03885480572597137,
+      "grad_norm": 0.9389914274215698,
+      "learning_rate": 2.528842105263158e-05,
+      "loss": 1.2456,
+      "step": 152
+    },
+    {
+      "epoch": 0.039110429447852764,
+      "grad_norm": 1.0959888696670532,
+      "learning_rate": 2.4761578947368418e-05,
+      "loss": 1.2292,
+      "step": 153
+    },
+    {
+      "epoch": 0.03936605316973415,
+      "grad_norm": 0.9445658326148987,
+      "learning_rate": 2.423473684210526e-05,
+      "loss": 1.3019,
+      "step": 154
+    },
+    {
+      "epoch": 0.03962167689161554,
+      "grad_norm": 1.1817337274551392,
+      "learning_rate": 2.3707894736842103e-05,
+      "loss": 1.4457,
+      "step": 155
+    },
+    {
+      "epoch": 0.03987730061349693,
+      "grad_norm": 0.9472606778144836,
+      "learning_rate": 2.3181052631578946e-05,
+      "loss": 1.405,
+      "step": 156
+    },
+    {
+      "epoch": 0.04013292433537832,
+      "grad_norm": 1.0863560438156128,
+      "learning_rate": 2.265421052631579e-05,
+      "loss": 1.4356,
+      "step": 157
+    },
+    {
+      "epoch": 0.040388548057259714,
+      "grad_norm": 0.9559451341629028,
+      "learning_rate": 2.212736842105263e-05,
+      "loss": 1.4123,
+      "step": 158
+    },
+    {
+      "epoch": 0.040644171779141106,
+      "grad_norm": 0.9859912395477295,
+      "learning_rate": 2.1600526315789474e-05,
+      "loss": 1.1852,
+      "step": 159
+    },
+    {
+      "epoch": 0.0408997955010225,
+      "grad_norm": 1.0700509548187256,
+      "learning_rate": 2.1073684210526313e-05,
+      "loss": 1.3571,
+      "step": 160
+    },
+    {
+      "epoch": 0.04115541922290389,
+      "grad_norm": 1.1035226583480835,
+      "learning_rate": 2.0546842105263155e-05,
+      "loss": 1.3699,
+      "step": 161
+    },
+    {
+      "epoch": 0.04141104294478527,
+      "grad_norm": 1.0564489364624023,
+      "learning_rate": 2.002e-05,
+      "loss": 1.1892,
+      "step": 162
+    },
+    {
+      "epoch": 0.041666666666666664,
+      "grad_norm": 0.9929437637329102,
+      "learning_rate": 1.949315789473684e-05,
+      "loss": 1.0838,
+      "step": 163
+    },
+    {
+      "epoch": 0.041922290388548056,
+      "grad_norm": 1.1558207273483276,
+      "learning_rate": 1.8966315789473683e-05,
+      "loss": 1.2928,
+      "step": 164
+    },
+    {
+      "epoch": 0.04217791411042945,
+      "grad_norm": 1.2279285192489624,
+      "learning_rate": 1.8439473684210522e-05,
+      "loss": 1.3294,
+      "step": 165
+    },
+    {
+      "epoch": 0.04243353783231084,
+      "grad_norm": 1.1958622932434082,
+      "learning_rate": 1.791263157894737e-05,
+      "loss": 1.3772,
+      "step": 166
+    },
+    {
+      "epoch": 0.04268916155419223,
+      "grad_norm": 1.331708312034607,
+      "learning_rate": 1.738578947368421e-05,
+      "loss": 1.356,
+      "step": 167
+    },
+    {
+      "epoch": 0.04294478527607362,
+      "grad_norm": 1.0906548500061035,
+      "learning_rate": 1.685894736842105e-05,
+      "loss": 1.1135,
+      "step": 168
+    },
+    {
+      "epoch": 0.04320040899795501,
+      "grad_norm": 1.1573306322097778,
+      "learning_rate": 1.6332105263157893e-05,
+      "loss": 1.2765,
+      "step": 169
+    },
+    {
+      "epoch": 0.0434560327198364,
+      "grad_norm": 1.2292710542678833,
+      "learning_rate": 1.5805263157894735e-05,
+      "loss": 1.3473,
+      "step": 170
+    },
+    {
+      "epoch": 0.04371165644171779,
+      "grad_norm": 1.2372936010360718,
+      "learning_rate": 1.5278421052631578e-05,
+      "loss": 1.3469,
+      "step": 171
+    },
+    {
+      "epoch": 0.04396728016359918,
+      "grad_norm": 1.3126474618911743,
+      "learning_rate": 1.4751578947368419e-05,
+      "loss": 1.2876,
+      "step": 172
+    },
+    {
+      "epoch": 0.04422290388548057,
+      "grad_norm": 1.0703507661819458,
+      "learning_rate": 1.4224736842105262e-05,
+      "loss": 1.1434,
+      "step": 173
+    },
+    {
+      "epoch": 0.04447852760736196,
+      "grad_norm": 1.3084443807601929,
+      "learning_rate": 1.3697894736842106e-05,
+      "loss": 1.3667,
+      "step": 174
+    },
+    {
+      "epoch": 0.044734151329243355,
+      "grad_norm": 1.1841418743133545,
+      "learning_rate": 1.3171052631578945e-05,
+      "loss": 1.1693,
+      "step": 175
+    },
+    {
+      "epoch": 0.044989775051124746,
+      "grad_norm": 1.2170230150222778,
+      "learning_rate": 1.264421052631579e-05,
+      "loss": 1.2311,
+      "step": 176
+    },
+    {
+      "epoch": 0.04524539877300614,
+      "grad_norm": 1.4149560928344727,
+      "learning_rate": 1.211736842105263e-05,
+      "loss": 1.2887,
+      "step": 177
+    },
+    {
+      "epoch": 0.04550102249488753,
+      "grad_norm": 1.2403512001037598,
+      "learning_rate": 1.1590526315789473e-05,
+      "loss": 1.2929,
+      "step": 178
+    },
+    {
+      "epoch": 0.04575664621676891,
+      "grad_norm": 1.4973044395446777,
+      "learning_rate": 1.1063684210526316e-05,
+      "loss": 1.2258,
+      "step": 179
+    },
+    {
+      "epoch": 0.046012269938650305,
+      "grad_norm": 1.1934603452682495,
+      "learning_rate": 1.0536842105263156e-05,
+      "loss": 1.0712,
+      "step": 180
+    },
+    {
+      "epoch": 0.046267893660531696,
+      "grad_norm": 1.2042814493179321,
+      "learning_rate": 1.001e-05,
+      "loss": 1.1148,
+      "step": 181
+    },
+    {
+      "epoch": 0.04652351738241309,
+      "grad_norm": 1.599585771560669,
+      "learning_rate": 9.483157894736842e-06,
+      "loss": 1.3627,
+      "step": 182
+    },
+    {
+      "epoch": 0.04677914110429448,
+      "grad_norm": 1.2744159698486328,
+      "learning_rate": 8.956315789473684e-06,
+      "loss": 1.1113,
+      "step": 183
+    },
+    {
+      "epoch": 0.04703476482617587,
+      "grad_norm": 1.2782379388809204,
+      "learning_rate": 8.429473684210525e-06,
+      "loss": 1.1487,
+      "step": 184
+    },
+    {
+      "epoch": 0.04729038854805726,
+      "grad_norm": 1.3419467210769653,
+      "learning_rate": 7.902631578947368e-06,
+      "loss": 1.056,
+      "step": 185
+    },
+    {
+      "epoch": 0.04754601226993865,
+      "grad_norm": 1.544342279434204,
+      "learning_rate": 7.3757894736842095e-06,
+      "loss": 1.2106,
+      "step": 186
+    },
+    {
+      "epoch": 0.04780163599182004,
+      "grad_norm": 1.387300729751587,
+      "learning_rate": 6.848947368421053e-06,
+      "loss": 1.0454,
+      "step": 187
+    },
+    {
+      "epoch": 0.04805725971370143,
+      "grad_norm": 1.471253752708435,
+      "learning_rate": 6.322105263157895e-06,
+      "loss": 1.1383,
+      "step": 188
+    },
+    {
+      "epoch": 0.04831288343558282,
+      "grad_norm": 1.6812406778335571,
+      "learning_rate": 5.7952631578947365e-06,
+      "loss": 1.1018,
+      "step": 189
+    },
+    {
+      "epoch": 0.04856850715746421,
+      "grad_norm": 1.8098957538604736,
+      "learning_rate": 5.268421052631578e-06,
+      "loss": 1.1271,
+      "step": 190
+    },
+    {
+      "epoch": 0.048824130879345604,
+      "grad_norm": 1.7692208290100098,
+      "learning_rate": 4.741578947368421e-06,
+      "loss": 1.1954,
+      "step": 191
+    },
+    {
+      "epoch": 0.049079754601226995,
+      "grad_norm": 1.8959792852401733,
+      "learning_rate": 4.2147368421052626e-06,
+      "loss": 1.3042,
+      "step": 192
+    },
+    {
+      "epoch": 0.049335378323108386,
+      "grad_norm": 1.7091044187545776,
+      "learning_rate": 3.6878947368421047e-06,
+      "loss": 1.089,
+      "step": 193
+    },
+    {
+      "epoch": 0.04959100204498978,
+      "grad_norm": 1.944200038909912,
+      "learning_rate": 3.1610526315789474e-06,
+      "loss": 1.3821,
+      "step": 194
+    },
+    {
+      "epoch": 0.04984662576687116,
+      "grad_norm": 1.6086976528167725,
+      "learning_rate": 2.634210526315789e-06,
+      "loss": 0.7973,
+      "step": 195
+    },
+    {
+      "epoch": 0.050102249488752554,
+      "grad_norm": 2.2544631958007812,
+      "learning_rate": 2.1073684210526313e-06,
+      "loss": 1.0902,
+      "step": 196
+    },
+    {
+      "epoch": 0.050357873210633945,
+      "grad_norm": 2.537259101867676,
+      "learning_rate": 1.5805263157894737e-06,
+      "loss": 1.26,
+      "step": 197
+    },
+    {
+      "epoch": 0.05061349693251534,
+      "grad_norm": 2.4353854656219482,
+      "learning_rate": 1.0536842105263156e-06,
+      "loss": 0.8782,
+      "step": 198
+    },
+    {
+      "epoch": 0.05086912065439673,
+      "grad_norm": 2.2021408081054688,
+      "learning_rate": 5.268421052631578e-07,
+      "loss": 0.5986,
+      "step": 199
+    },
+    {
+      "epoch": 0.05112474437627812,
+      "grad_norm": 3.3052453994750977,
+      "learning_rate": 0.0,
+      "loss": 0.6144,
+      "step": 200
+    },
+    {
+      "epoch": 0.05112474437627812,
+      "eval_loss": 1.1849799156188965,
+      "eval_runtime": 48.3452,
+      "eval_samples_per_second": 34.088,
+      "eval_steps_per_second": 8.522,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.06657392623616e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null