Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5b170aa73abe60d76162a32a71f7aa806e6d2d5a2c1c227176e77865df4f5dc
 size 78207176

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4ea6c19c3ed822e35a7f08069eb33f008b6e8814aa691c9b40fcd120b51b2cd
 size 78207176

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1289f77a37736925efa5ad5b4685cf01aefadc2a6ec129817831e4191706e58
 size 156656510

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a60210bcab7dd9abac9aa3d84bd2c14f854535053b0351e9cae9fde9dac3f43
 size 156656510

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b32c9cbb0b3679a30a8148c88e14b2b27ba1c5a25709be059b7a7242a73ded3e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:975f9705aa94b638ede9106ed3c06008531e4a181ba8af050ae7d4c527e99a9a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.34555259346961975,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.10785547366528851,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 39.818,
       "eval_steps_per_second": 19.909,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4684773605769216.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.3401404023170471,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.14380729822038468,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 39.818,
       "eval_steps_per_second": 19.909,
       "step": 150
+    },
+    {
+      "epoch": 0.10857451015639044,
+      "grad_norm": 0.2277640551328659,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.2502,
+      "step": 151
+    },
+    {
+      "epoch": 0.10929354664749236,
+      "grad_norm": 0.2793837785720825,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.2861,
+      "step": 152
+    },
+    {
+      "epoch": 0.11001258313859429,
+      "grad_norm": 0.44137507677078247,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.405,
+      "step": 153
+    },
+    {
+      "epoch": 0.1107316196296962,
+      "grad_norm": 0.2606457769870758,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.2649,
+      "step": 154
+    },
+    {
+      "epoch": 0.11145065612079813,
+      "grad_norm": 0.33907219767570496,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.403,
+      "step": 155
+    },
+    {
+      "epoch": 0.11216969261190006,
+      "grad_norm": 0.28054168820381165,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.3893,
+      "step": 156
+    },
+    {
+      "epoch": 0.11288872910300198,
+      "grad_norm": 0.31116437911987305,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.4007,
+      "step": 157
+    },
+    {
+      "epoch": 0.1136077655941039,
+      "grad_norm": 0.28493228554725647,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.3631,
+      "step": 158
+    },
+    {
+      "epoch": 0.11432680208520582,
+      "grad_norm": 0.3235074281692505,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.3939,
+      "step": 159
+    },
+    {
+      "epoch": 0.11504583857630775,
+      "grad_norm": 0.30169492959976196,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.329,
+      "step": 160
+    },
+    {
+      "epoch": 0.11576487506740966,
+      "grad_norm": 0.9609299302101135,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.5237,
+      "step": 161
+    },
+    {
+      "epoch": 0.1164839115585116,
+      "grad_norm": 0.24112538993358612,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.3021,
+      "step": 162
+    },
+    {
+      "epoch": 0.11720294804961352,
+      "grad_norm": 0.24137409031391144,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.2978,
+      "step": 163
+    },
+    {
+      "epoch": 0.11792198454071544,
+      "grad_norm": 0.28550300002098083,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.3217,
+      "step": 164
+    },
+    {
+      "epoch": 0.11864102103181737,
+      "grad_norm": 0.29699841141700745,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.2758,
+      "step": 165
+    },
+    {
+      "epoch": 0.11936005752291928,
+      "grad_norm": 0.27138757705688477,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.2664,
+      "step": 166
+    },
+    {
+      "epoch": 0.12007909401402121,
+      "grad_norm": 0.2857905626296997,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.3008,
+      "step": 167
+    },
+    {
+      "epoch": 0.12079813050512314,
+      "grad_norm": 0.21144956350326538,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.2829,
+      "step": 168
+    },
+    {
+      "epoch": 0.12151716699622506,
+      "grad_norm": 0.22459468245506287,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.2816,
+      "step": 169
+    },
+    {
+      "epoch": 0.12223620348732699,
+      "grad_norm": 0.19300833344459534,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.3023,
+      "step": 170
+    },
+    {
+      "epoch": 0.1229552399784289,
+      "grad_norm": 0.2020920366048813,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.319,
+      "step": 171
+    },
+    {
+      "epoch": 0.12367427646953083,
+      "grad_norm": 0.20110531151294708,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.32,
+      "step": 172
+    },
+    {
+      "epoch": 0.12439331296063275,
+      "grad_norm": 0.17036013305187225,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.3051,
+      "step": 173
+    },
+    {
+      "epoch": 0.1251123494517347,
+      "grad_norm": 0.17303591966629028,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.3138,
+      "step": 174
+    },
+    {
+      "epoch": 0.1258313859428366,
+      "grad_norm": 0.19305406510829926,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.3315,
+      "step": 175
+    },
+    {
+      "epoch": 0.12655042243393852,
+      "grad_norm": 0.19402852654457092,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.3325,
+      "step": 176
+    },
+    {
+      "epoch": 0.12726945892504044,
+      "grad_norm": 0.19571861624717712,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.3536,
+      "step": 177
+    },
+    {
+      "epoch": 0.12798849541614238,
+      "grad_norm": 0.17924807965755463,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.3514,
+      "step": 178
+    },
+    {
+      "epoch": 0.1287075319072443,
+      "grad_norm": 0.1848725527524948,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.3485,
+      "step": 179
+    },
+    {
+      "epoch": 0.1294265683983462,
+      "grad_norm": 0.1518690586090088,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.3308,
+      "step": 180
+    },
+    {
+      "epoch": 0.13014560488944815,
+      "grad_norm": 0.19320723414421082,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.3754,
+      "step": 181
+    },
+    {
+      "epoch": 0.13086464138055007,
+      "grad_norm": 0.15887463092803955,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.3554,
+      "step": 182
+    },
+    {
+      "epoch": 0.13158367787165198,
+      "grad_norm": 0.19461137056350708,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.3837,
+      "step": 183
+    },
+    {
+      "epoch": 0.1323027143627539,
+      "grad_norm": 0.20374387502670288,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.3782,
+      "step": 184
+    },
+    {
+      "epoch": 0.13302175085385584,
+      "grad_norm": 0.17451471090316772,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.3439,
+      "step": 185
+    },
+    {
+      "epoch": 0.13374078734495776,
+      "grad_norm": 0.18102188408374786,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.3599,
+      "step": 186
+    },
+    {
+      "epoch": 0.13445982383605967,
+      "grad_norm": 0.19148917496204376,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.3331,
+      "step": 187
+    },
+    {
+      "epoch": 0.13517886032716162,
+      "grad_norm": 0.21265220642089844,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.3481,
+      "step": 188
+    },
+    {
+      "epoch": 0.13589789681826353,
+      "grad_norm": 0.21602880954742432,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.3241,
+      "step": 189
+    },
+    {
+      "epoch": 0.13661693330936545,
+      "grad_norm": 0.2161773294210434,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.3491,
+      "step": 190
+    },
+    {
+      "epoch": 0.13733596980046736,
+      "grad_norm": 0.2377278059720993,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.3494,
+      "step": 191
+    },
+    {
+      "epoch": 0.1380550062915693,
+      "grad_norm": 0.20273280143737793,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.3559,
+      "step": 192
+    },
+    {
+      "epoch": 0.13877404278267122,
+      "grad_norm": 0.26119890809059143,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.3343,
+      "step": 193
+    },
+    {
+      "epoch": 0.13949307927377314,
+      "grad_norm": 0.24087214469909668,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.3304,
+      "step": 194
+    },
+    {
+      "epoch": 0.14021211576487508,
+      "grad_norm": 0.23309563100337982,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.3333,
+      "step": 195
+    },
+    {
+      "epoch": 0.140931152255977,
+      "grad_norm": 0.20350925624370575,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.2935,
+      "step": 196
+    },
+    {
+      "epoch": 0.1416501887470789,
+      "grad_norm": 0.22840029001235962,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.299,
+      "step": 197
+    },
+    {
+      "epoch": 0.14236922523818082,
+      "grad_norm": 0.24860765039920807,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.2984,
+      "step": 198
+    },
+    {
+      "epoch": 0.14308826172928277,
+      "grad_norm": 0.28271716833114624,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.2937,
+      "step": 199
+    },
+    {
+      "epoch": 0.14380729822038468,
+      "grad_norm": 0.37126749753952026,
+      "learning_rate": 0.0,
+      "loss": 0.2713,
+      "step": 200
+    },
+    {
+      "epoch": 0.14380729822038468,
+      "eval_loss": 0.3401404023170471,
+      "eval_runtime": 58.8186,
+      "eval_samples_per_second": 39.817,
+      "eval_steps_per_second": 19.909,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6229883634057216.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null