Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8d0b268993131ddca36693a3f3d5e57cc4a11ba23f00a58939f2269512c27b4
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:43063226301ab92a7bf44c97f619c0046f5ac2548aacbc697a9e2e7033277847
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79d0bcaaff93aa36cbc945e6693b12805ae476b13a006d1475883631b3ff87ea
 size 1292176234

 version https://git-lfs.github.com/spec/v1
+oid sha256:f72175066d8cf8569e831f76930f86590d4ad41651e52524fe9c86a30aa821fc
 size 1292176234

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c5ce158bab05b4ce03a215a7430ba48e7b0a705b4a2cdb396ae523837e3a9bf
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:96b8fb31f5c18419bd885fdc8b5fd541855c66785966968c8c7aa4ff541cbb94
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.06176510825753212,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.012836695834492201,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 13.445,
       "eval_steps_per_second": 6.723,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.13283302801408e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.04504312202334404,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.0171155944459896,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.445,
       "eval_steps_per_second": 6.723,
       "step": 150
+    },
+    {
+      "epoch": 0.012922273806722149,
+      "grad_norm": 0.5880382657051086,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.0734,
+      "step": 151
+    },
+    {
+      "epoch": 0.013007851778952098,
+      "grad_norm": 0.7938467860221863,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.0846,
+      "step": 152
+    },
+    {
+      "epoch": 0.013093429751182046,
+      "grad_norm": 0.965932309627533,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.1002,
+      "step": 153
+    },
+    {
+      "epoch": 0.013179007723411993,
+      "grad_norm": 0.8478067517280579,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.0823,
+      "step": 154
+    },
+    {
+      "epoch": 0.013264585695641942,
+      "grad_norm": 0.5046500563621521,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.0446,
+      "step": 155
+    },
+    {
+      "epoch": 0.01335016366787189,
+      "grad_norm": 0.6454620957374573,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.063,
+      "step": 156
+    },
+    {
+      "epoch": 0.013435741640101837,
+      "grad_norm": 0.39416393637657166,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.0452,
+      "step": 157
+    },
+    {
+      "epoch": 0.013521319612331786,
+      "grad_norm": 0.4666137397289276,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.0652,
+      "step": 158
+    },
+    {
+      "epoch": 0.013606897584561734,
+      "grad_norm": 0.34583160281181335,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.0479,
+      "step": 159
+    },
+    {
+      "epoch": 0.013692475556791681,
+      "grad_norm": 0.20728279650211334,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.047,
+      "step": 160
+    },
+    {
+      "epoch": 0.01377805352902163,
+      "grad_norm": 0.3418344557285309,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.043,
+      "step": 161
+    },
+    {
+      "epoch": 0.013863631501251578,
+      "grad_norm": 0.2553056478500366,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.0406,
+      "step": 162
+    },
+    {
+      "epoch": 0.013949209473481525,
+      "grad_norm": 0.44725626707077026,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.0562,
+      "step": 163
+    },
+    {
+      "epoch": 0.014034787445711475,
+      "grad_norm": 0.4662851393222809,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.0405,
+      "step": 164
+    },
+    {
+      "epoch": 0.014120365417941422,
+      "grad_norm": 0.4762590825557709,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.0441,
+      "step": 165
+    },
+    {
+      "epoch": 0.01420594339017137,
+      "grad_norm": 0.5769884586334229,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.0511,
+      "step": 166
+    },
+    {
+      "epoch": 0.014291521362401319,
+      "grad_norm": 0.5522760152816772,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.0514,
+      "step": 167
+    },
+    {
+      "epoch": 0.014377099334631266,
+      "grad_norm": 0.2631785273551941,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.0486,
+      "step": 168
+    },
+    {
+      "epoch": 0.014462677306861214,
+      "grad_norm": 0.5963943004608154,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.0479,
+      "step": 169
+    },
+    {
+      "epoch": 0.014548255279091161,
+      "grad_norm": 0.23294425010681152,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.0506,
+      "step": 170
+    },
+    {
+      "epoch": 0.01463383325132111,
+      "grad_norm": 0.19958297908306122,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.0354,
+      "step": 171
+    },
+    {
+      "epoch": 0.014719411223551058,
+      "grad_norm": 0.18691357970237732,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.0407,
+      "step": 172
+    },
+    {
+      "epoch": 0.014804989195781005,
+      "grad_norm": 0.454658567905426,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.047,
+      "step": 173
+    },
+    {
+      "epoch": 0.014890567168010954,
+      "grad_norm": 0.24187132716178894,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.0307,
+      "step": 174
+    },
+    {
+      "epoch": 0.014976145140240902,
+      "grad_norm": 0.24971577525138855,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.0573,
+      "step": 175
+    },
+    {
+      "epoch": 0.01506172311247085,
+      "grad_norm": 0.13674812018871307,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.0381,
+      "step": 176
+    },
+    {
+      "epoch": 0.015147301084700799,
+      "grad_norm": 0.1727634221315384,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.038,
+      "step": 177
+    },
+    {
+      "epoch": 0.015232879056930746,
+      "grad_norm": 0.2428557425737381,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.0367,
+      "step": 178
+    },
+    {
+      "epoch": 0.015318457029160694,
+      "grad_norm": 0.3522767126560211,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.0481,
+      "step": 179
+    },
+    {
+      "epoch": 0.015404035001390643,
+      "grad_norm": 0.22990131378173828,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.0417,
+      "step": 180
+    },
+    {
+      "epoch": 0.01548961297362059,
+      "grad_norm": 0.16363275051116943,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.0293,
+      "step": 181
+    },
+    {
+      "epoch": 0.015575190945850538,
+      "grad_norm": 0.196491077542305,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.0421,
+      "step": 182
+    },
+    {
+      "epoch": 0.015660768918080485,
+      "grad_norm": 0.17789626121520996,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.05,
+      "step": 183
+    },
+    {
+      "epoch": 0.015746346890310434,
+      "grad_norm": 0.27496829628944397,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.0442,
+      "step": 184
+    },
+    {
+      "epoch": 0.015831924862540384,
+      "grad_norm": 0.3097536265850067,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.0587,
+      "step": 185
+    },
+    {
+      "epoch": 0.01591750283477033,
+      "grad_norm": 0.21608707308769226,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.0384,
+      "step": 186
+    },
+    {
+      "epoch": 0.01600308080700028,
+      "grad_norm": 0.22380782663822174,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.0382,
+      "step": 187
+    },
+    {
+      "epoch": 0.016088658779230228,
+      "grad_norm": 0.47718194127082825,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.0429,
+      "step": 188
+    },
+    {
+      "epoch": 0.016174236751460173,
+      "grad_norm": 0.24429389834403992,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.0383,
+      "step": 189
+    },
+    {
+      "epoch": 0.016259814723690123,
+      "grad_norm": 0.3479745388031006,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.0366,
+      "step": 190
+    },
+    {
+      "epoch": 0.016345392695920072,
+      "grad_norm": 0.2626732885837555,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.0561,
+      "step": 191
+    },
+    {
+      "epoch": 0.016430970668150018,
+      "grad_norm": 0.16285663843154907,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.0325,
+      "step": 192
+    },
+    {
+      "epoch": 0.016516548640379967,
+      "grad_norm": 0.39448630809783936,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.0523,
+      "step": 193
+    },
+    {
+      "epoch": 0.016602126612609912,
+      "grad_norm": 0.29758745431900024,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.0508,
+      "step": 194
+    },
+    {
+      "epoch": 0.01668770458483986,
+      "grad_norm": 0.38539424538612366,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.044,
+      "step": 195
+    },
+    {
+      "epoch": 0.01677328255706981,
+      "grad_norm": 0.494890034198761,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.0501,
+      "step": 196
+    },
+    {
+      "epoch": 0.016858860529299757,
+      "grad_norm": 0.5350846648216248,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.0617,
+      "step": 197
+    },
+    {
+      "epoch": 0.016944438501529706,
+      "grad_norm": 0.44980141520500183,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.0441,
+      "step": 198
+    },
+    {
+      "epoch": 0.017030016473759655,
+      "grad_norm": 0.6208694577217102,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.0438,
+      "step": 199
+    },
+    {
+      "epoch": 0.0171155944459896,
+      "grad_norm": 0.742195188999176,
+      "learning_rate": 0.0,
+      "loss": 0.0922,
+      "step": 200
+    },
+    {
+      "epoch": 0.0171155944459896,
+      "eval_loss": 0.04504312202334404,
+      "eval_runtime": 1465.0207,
+      "eval_samples_per_second": 13.434,
+      "eval_steps_per_second": 6.717,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.84377737068544e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null