Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47e9e11086022bdb672109255bcb5da3ea9c9c5b728778ba9dee332b610d60ba
 size 628216

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5d23dd1a5b61c4d3ed7eb8e2f7c5c86eb7f82d4c1b1647084a8d7590624a9ae
 size 628216

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a0d8dfdabfc6df4fb78d5072c2dd9120dd8b8db86fbfe2b9aabf4be73804c1d
 size 1273162

 version https://git-lfs.github.com/spec/v1
+oid sha256:e40f91771dba971377f482b2fc3e80990a715f44b69998c9d15934a26e10f737
 size 1273162

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:626c6d4cfa38cc6b99d3ede7924a255742fa64c0b1d284a9b68cafb35460e388
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e02d73ba79bec205b89c636f5685d264097acf62408785a68f0dfc1f9e44a25
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.746877670288086,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.032411408815903195,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 286.237,
       "eval_steps_per_second": 143.137,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 104685247660032.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.745776176452637,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.043215211754537596,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 286.237,
       "eval_steps_per_second": 143.137,
       "step": 150
+    },
+    {
+      "epoch": 0.032627484874675886,
+      "grad_norm": 0.2572970688343048,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 10.7659,
+      "step": 151
+    },
+    {
+      "epoch": 0.032843560933448576,
+      "grad_norm": 0.23141218721866608,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 10.7649,
+      "step": 152
+    },
+    {
+      "epoch": 0.03305963699222126,
+      "grad_norm": 0.2419702559709549,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 10.7615,
+      "step": 153
+    },
+    {
+      "epoch": 0.03327571305099395,
+      "grad_norm": 0.19218285381793976,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 10.7609,
+      "step": 154
+    },
+    {
+      "epoch": 0.03349178910976664,
+      "grad_norm": 0.2367192953824997,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 10.7647,
+      "step": 155
+    },
+    {
+      "epoch": 0.033707865168539325,
+      "grad_norm": 0.21695514023303986,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 10.765,
+      "step": 156
+    },
+    {
+      "epoch": 0.033923941227312016,
+      "grad_norm": 0.2253107875585556,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 10.7699,
+      "step": 157
+    },
+    {
+      "epoch": 0.0341400172860847,
+      "grad_norm": 0.19992785155773163,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 10.7592,
+      "step": 158
+    },
+    {
+      "epoch": 0.03435609334485739,
+      "grad_norm": 0.21194936335086823,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 10.7629,
+      "step": 159
+    },
+    {
+      "epoch": 0.03457216940363008,
+      "grad_norm": 0.26007580757141113,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 10.7648,
+      "step": 160
+    },
+    {
+      "epoch": 0.034788245462402764,
+      "grad_norm": 0.22751381993293762,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 10.7603,
+      "step": 161
+    },
+    {
+      "epoch": 0.035004321521175455,
+      "grad_norm": 0.2545979619026184,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 10.7617,
+      "step": 162
+    },
+    {
+      "epoch": 0.03522039757994814,
+      "grad_norm": 0.20070402324199677,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 10.7547,
+      "step": 163
+    },
+    {
+      "epoch": 0.03543647363872083,
+      "grad_norm": 0.20730213820934296,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 10.761,
+      "step": 164
+    },
+    {
+      "epoch": 0.03565254969749352,
+      "grad_norm": 0.2222391963005066,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 10.7707,
+      "step": 165
+    },
+    {
+      "epoch": 0.035868625756266204,
+      "grad_norm": 0.21017976105213165,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 10.7582,
+      "step": 166
+    },
+    {
+      "epoch": 0.036084701815038894,
+      "grad_norm": 0.21349570155143738,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 10.7534,
+      "step": 167
+    },
+    {
+      "epoch": 0.036300777873811585,
+      "grad_norm": 0.19311493635177612,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 10.7536,
+      "step": 168
+    },
+    {
+      "epoch": 0.03651685393258427,
+      "grad_norm": 0.18329201638698578,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 10.7595,
+      "step": 169
+    },
+    {
+      "epoch": 0.03673292999135696,
+      "grad_norm": 0.24617359042167664,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 10.7562,
+      "step": 170
+    },
+    {
+      "epoch": 0.03694900605012964,
+      "grad_norm": 0.24574460089206696,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 10.7496,
+      "step": 171
+    },
+    {
+      "epoch": 0.037165082108902334,
+      "grad_norm": 0.2094903290271759,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 10.7454,
+      "step": 172
+    },
+    {
+      "epoch": 0.037381158167675024,
+      "grad_norm": 0.23481670022010803,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 10.7503,
+      "step": 173
+    },
+    {
+      "epoch": 0.03759723422644771,
+      "grad_norm": 0.20529882609844208,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 10.7551,
+      "step": 174
+    },
+    {
+      "epoch": 0.0378133102852204,
+      "grad_norm": 0.2087268829345703,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 10.7512,
+      "step": 175
+    },
+    {
+      "epoch": 0.03802938634399308,
+      "grad_norm": 0.24301747977733612,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 10.7587,
+      "step": 176
+    },
+    {
+      "epoch": 0.03824546240276577,
+      "grad_norm": 0.24005451798439026,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 10.7615,
+      "step": 177
+    },
+    {
+      "epoch": 0.038461538461538464,
+      "grad_norm": 0.2205115705728531,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 10.7405,
+      "step": 178
+    },
+    {
+      "epoch": 0.03867761452031115,
+      "grad_norm": 0.22657622396945953,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 10.7544,
+      "step": 179
+    },
+    {
+      "epoch": 0.03889369057908384,
+      "grad_norm": 0.24251677095890045,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 10.7535,
+      "step": 180
+    },
+    {
+      "epoch": 0.03910976663785653,
+      "grad_norm": 0.22308377921581268,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 10.7536,
+      "step": 181
+    },
+    {
+      "epoch": 0.03932584269662921,
+      "grad_norm": 0.2320922166109085,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 10.7482,
+      "step": 182
+    },
+    {
+      "epoch": 0.0395419187554019,
+      "grad_norm": 0.23464159667491913,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 10.7496,
+      "step": 183
+    },
+    {
+      "epoch": 0.03975799481417459,
+      "grad_norm": 0.21937261521816254,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 10.7502,
+      "step": 184
+    },
+    {
+      "epoch": 0.03997407087294728,
+      "grad_norm": 0.23580560088157654,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 10.7532,
+      "step": 185
+    },
+    {
+      "epoch": 0.04019014693171997,
+      "grad_norm": 0.2424219399690628,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 10.745,
+      "step": 186
+    },
+    {
+      "epoch": 0.04040622299049265,
+      "grad_norm": 0.2548362612724304,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 10.7458,
+      "step": 187
+    },
+    {
+      "epoch": 0.04062229904926534,
+      "grad_norm": 0.31250232458114624,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 10.744,
+      "step": 188
+    },
+    {
+      "epoch": 0.040838375108038026,
+      "grad_norm": 0.2745141088962555,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 10.7386,
+      "step": 189
+    },
+    {
+      "epoch": 0.04105445116681072,
+      "grad_norm": 0.2804180383682251,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 10.7314,
+      "step": 190
+    },
+    {
+      "epoch": 0.04127052722558341,
+      "grad_norm": 0.2679109573364258,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 10.7439,
+      "step": 191
+    },
+    {
+      "epoch": 0.04148660328435609,
+      "grad_norm": 0.22815419733524323,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 10.7384,
+      "step": 192
+    },
+    {
+      "epoch": 0.04170267934312878,
+      "grad_norm": 0.2827376127243042,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 10.7385,
+      "step": 193
+    },
+    {
+      "epoch": 0.04191875540190147,
+      "grad_norm": 0.3146594762802124,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 10.7451,
+      "step": 194
+    },
+    {
+      "epoch": 0.042134831460674156,
+      "grad_norm": 0.30587294697761536,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 10.7321,
+      "step": 195
+    },
+    {
+      "epoch": 0.04235090751944685,
+      "grad_norm": 0.26626572012901306,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 10.7262,
+      "step": 196
+    },
+    {
+      "epoch": 0.04256698357821953,
+      "grad_norm": 0.3111208975315094,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 10.7239,
+      "step": 197
+    },
+    {
+      "epoch": 0.04278305963699222,
+      "grad_norm": 0.4671022891998291,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 10.7375,
+      "step": 198
+    },
+    {
+      "epoch": 0.04299913569576491,
+      "grad_norm": 0.520592987537384,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 10.7301,
+      "step": 199
+    },
+    {
+      "epoch": 0.043215211754537596,
+      "grad_norm": 0.5407875776290894,
+      "learning_rate": 0.0,
+      "loss": 10.6871,
+      "step": 200
+    },
+    {
+      "epoch": 0.043215211754537596,
+      "eval_loss": 10.745776176452637,
+      "eval_runtime": 27.2162,
+      "eval_samples_per_second": 286.41,
+      "eval_steps_per_second": 143.223,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 139523405119488.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null