Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff9f368c8e6d238f92336375ac6a4b15f30c66eb02140aa5b7ecff1fcf3c6cf0
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:be971c0be3791467ff11cc2d1f1cf131962407b40a5c8bd2671193404d0d7bc3
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9bd82f7e3b6ac2fab855f046ca5562f791a22fcf56c9cdc2a0bde26e3241e1e
 size 243802484

 version https://git-lfs.github.com/spec/v1
+oid sha256:4db5461c126187dc1a84b2e5cbf8beb178694686765ce8e59656483f57a65cf3
 size 243802484

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cbb077261230c1bee6f018ce5a88c71ab9a5b93bf95ba4cd5109b785b984c5e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c4352f8de2f91e5bd5d9396ef224a06b538d6c3943e23d7ff9a33d17c69f642
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5062534213066101,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.06426735218508997,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 18.299,
       "eval_steps_per_second": 4.576,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.660191311763866e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5052040219306946,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.0856898029134533,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 18.299,
       "eval_steps_per_second": 4.576,
       "step": 150
+    },
+    {
+      "epoch": 0.06469580119965725,
+      "grad_norm": 0.12356305122375488,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 0.4292,
+      "step": 151
+    },
+    {
+      "epoch": 0.06512425021422451,
+      "grad_norm": 0.17837627232074738,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 0.5327,
+      "step": 152
+    },
+    {
+      "epoch": 0.06555269922879177,
+      "grad_norm": 0.1691794991493225,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 0.5586,
+      "step": 153
+    },
+    {
+      "epoch": 0.06598114824335904,
+      "grad_norm": 0.1637227088212967,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 0.4833,
+      "step": 154
+    },
+    {
+      "epoch": 0.0664095972579263,
+      "grad_norm": 0.17156657576560974,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 0.4976,
+      "step": 155
+    },
+    {
+      "epoch": 0.06683804627249357,
+      "grad_norm": 0.17756806313991547,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 0.4562,
+      "step": 156
+    },
+    {
+      "epoch": 0.06726649528706084,
+      "grad_norm": 0.189994215965271,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 0.49,
+      "step": 157
+    },
+    {
+      "epoch": 0.0676949443016281,
+      "grad_norm": 0.19219642877578735,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 0.4925,
+      "step": 158
+    },
+    {
+      "epoch": 0.06812339331619537,
+      "grad_norm": 0.18906882405281067,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 0.5225,
+      "step": 159
+    },
+    {
+      "epoch": 0.06855184233076264,
+      "grad_norm": 0.17764447629451752,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 0.4783,
+      "step": 160
+    },
+    {
+      "epoch": 0.0689802913453299,
+      "grad_norm": 0.19509181380271912,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 0.5781,
+      "step": 161
+    },
+    {
+      "epoch": 0.06940874035989718,
+      "grad_norm": 0.1957414448261261,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.5117,
+      "step": 162
+    },
+    {
+      "epoch": 0.06983718937446444,
+      "grad_norm": 0.18858219683170319,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 0.4528,
+      "step": 163
+    },
+    {
+      "epoch": 0.0702656383890317,
+      "grad_norm": 0.19822679460048676,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 0.4929,
+      "step": 164
+    },
+    {
+      "epoch": 0.07069408740359898,
+      "grad_norm": 0.18546193838119507,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 0.5002,
+      "step": 165
+    },
+    {
+      "epoch": 0.07112253641816624,
+      "grad_norm": 0.20249854028224945,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 0.5415,
+      "step": 166
+    },
+    {
+      "epoch": 0.0715509854327335,
+      "grad_norm": 0.19854749739170074,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 0.4906,
+      "step": 167
+    },
+    {
+      "epoch": 0.07197943444730077,
+      "grad_norm": 0.20104508101940155,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 0.5106,
+      "step": 168
+    },
+    {
+      "epoch": 0.07240788346186804,
+      "grad_norm": 0.19187435507774353,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 0.4818,
+      "step": 169
+    },
+    {
+      "epoch": 0.0728363324764353,
+      "grad_norm": 0.20160090923309326,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 0.4764,
+      "step": 170
+    },
+    {
+      "epoch": 0.07326478149100257,
+      "grad_norm": 0.2078496366739273,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 0.5491,
+      "step": 171
+    },
+    {
+      "epoch": 0.07369323050556983,
+      "grad_norm": 0.20284205675125122,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 0.55,
+      "step": 172
+    },
+    {
+      "epoch": 0.07412167952013711,
+      "grad_norm": 0.23124438524246216,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 0.5559,
+      "step": 173
+    },
+    {
+      "epoch": 0.07455012853470437,
+      "grad_norm": 0.19652493298053741,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 0.5053,
+      "step": 174
+    },
+    {
+      "epoch": 0.07497857754927163,
+      "grad_norm": 0.19624044001102448,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 0.455,
+      "step": 175
+    },
+    {
+      "epoch": 0.07540702656383891,
+      "grad_norm": 0.21186797320842743,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 0.533,
+      "step": 176
+    },
+    {
+      "epoch": 0.07583547557840617,
+      "grad_norm": 0.21794423460960388,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 0.4801,
+      "step": 177
+    },
+    {
+      "epoch": 0.07626392459297343,
+      "grad_norm": 0.2287568598985672,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 0.5806,
+      "step": 178
+    },
+    {
+      "epoch": 0.0766923736075407,
+      "grad_norm": 0.2035348117351532,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 0.4855,
+      "step": 179
+    },
+    {
+      "epoch": 0.07712082262210797,
+      "grad_norm": 0.21081854403018951,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 0.4787,
+      "step": 180
+    },
+    {
+      "epoch": 0.07754927163667523,
+      "grad_norm": 0.21942371129989624,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.5068,
+      "step": 181
+    },
+    {
+      "epoch": 0.0779777206512425,
+      "grad_norm": 0.21645888686180115,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 0.5106,
+      "step": 182
+    },
+    {
+      "epoch": 0.07840616966580977,
+      "grad_norm": 0.23054279386997223,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 0.5188,
+      "step": 183
+    },
+    {
+      "epoch": 0.07883461868037704,
+      "grad_norm": 0.2399861216545105,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 0.5023,
+      "step": 184
+    },
+    {
+      "epoch": 0.0792630676949443,
+      "grad_norm": 0.23975235223770142,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 0.564,
+      "step": 185
+    },
+    {
+      "epoch": 0.07969151670951156,
+      "grad_norm": 0.21397042274475098,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 0.4383,
+      "step": 186
+    },
+    {
+      "epoch": 0.08011996572407884,
+      "grad_norm": 0.22963669896125793,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 0.4878,
+      "step": 187
+    },
+    {
+      "epoch": 0.0805484147386461,
+      "grad_norm": 0.22737430036067963,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 0.4714,
+      "step": 188
+    },
+    {
+      "epoch": 0.08097686375321336,
+      "grad_norm": 0.21495851874351501,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 0.502,
+      "step": 189
+    },
+    {
+      "epoch": 0.08140531276778064,
+      "grad_norm": 0.22086374461650848,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 0.4809,
+      "step": 190
+    },
+    {
+      "epoch": 0.0818337617823479,
+      "grad_norm": 0.21731896698474884,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 0.4487,
+      "step": 191
+    },
+    {
+      "epoch": 0.08226221079691516,
+      "grad_norm": 0.21713416278362274,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 0.5047,
+      "step": 192
+    },
+    {
+      "epoch": 0.08269065981148244,
+      "grad_norm": 0.2234414517879486,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 0.4845,
+      "step": 193
+    },
+    {
+      "epoch": 0.0831191088260497,
+      "grad_norm": 0.23213472962379456,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 0.4695,
+      "step": 194
+    },
+    {
+      "epoch": 0.08354755784061697,
+      "grad_norm": 0.23573216795921326,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 0.4251,
+      "step": 195
+    },
+    {
+      "epoch": 0.08397600685518423,
+      "grad_norm": 0.229087695479393,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 0.4232,
+      "step": 196
+    },
+    {
+      "epoch": 0.0844044558697515,
+      "grad_norm": 0.25884193181991577,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 0.4492,
+      "step": 197
+    },
+    {
+      "epoch": 0.08483290488431877,
+      "grad_norm": 0.2584173083305359,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 0.4588,
+      "step": 198
+    },
+    {
+      "epoch": 0.08526135389888603,
+      "grad_norm": 0.2601085305213928,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 0.4348,
+      "step": 199
+    },
+    {
+      "epoch": 0.0856898029134533,
+      "grad_norm": 0.32536494731903076,
+      "learning_rate": 0.0,
+      "loss": 0.4901,
+      "step": 200
+    },
+    {
+      "epoch": 0.0856898029134533,
+      "eval_loss": 0.5052040219306946,
+      "eval_runtime": 214.9884,
+      "eval_samples_per_second": 18.285,
+      "eval_steps_per_second": 4.572,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2903966892149965e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null