Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:155d0b4e14958cc4826f8f4fc007eb3dc7026eaf411977bfeb6e3aac520051aa
 size 1195453784

 version https://git-lfs.github.com/spec/v1
+oid sha256:9af05120979dc99d387e47e2801b35d07332305c38912db9e1e1437ee33c9368
 size 1195453784

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddf63402afe672f193212d4a09d550876bc185286e6714d111d317f01a3349bd
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e7025c5b45ad14c4a8c93b36d36fdd0121f94234344064e453372a01ba92272
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3612bf480764edd49c697acca715f084c88e3145aadbe322ed6ad5dba8e6163e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:23eb6a5abbeb422a57fe16bc2be5723c24ce29e4c93704fdab29340e3c115630
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b26348e70deb5f7d6cee52f5eb084c0e6829440001a3c6d0128b93cd074af8c2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b60d6f1383abda4776549360effee800fe6cfe2c0604503e9e3fbaa79347f790
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.10195372253656387,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.3395585738539898,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 13.042,
       "eval_steps_per_second": 3.261,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.157230712548229e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.09589990973472595,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.4527447651386531,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.042,
       "eval_steps_per_second": 3.261,
       "step": 150
+    },
+    {
+      "epoch": 0.3418222976796831,
+      "grad_norm": 1.643057942390442,
+      "learning_rate": 1.0872630680850196e-05,
+      "loss": 1.2261,
+      "step": 151
+    },
+    {
+      "epoch": 0.34408602150537637,
+      "grad_norm": 2.964616298675537,
+      "learning_rate": 1.0456712550462898e-05,
+      "loss": 0.5314,
+      "step": 152
+    },
+    {
+      "epoch": 0.3463497453310696,
+      "grad_norm": 0.5338827967643738,
+      "learning_rate": 1.0047504309801104e-05,
+      "loss": 0.2003,
+      "step": 153
+    },
+    {
+      "epoch": 0.3486134691567629,
+      "grad_norm": 0.39902618527412415,
+      "learning_rate": 9.645117832311886e-06,
+      "loss": 0.1775,
+      "step": 154
+    },
+    {
+      "epoch": 0.3508771929824561,
+      "grad_norm": 0.8994790315628052,
+      "learning_rate": 9.249663126440394e-06,
+      "loss": 0.1654,
+      "step": 155
+    },
+    {
+      "epoch": 0.3531409168081494,
+      "grad_norm": 0.43335387110710144,
+      "learning_rate": 8.861248305554624e-06,
+      "loss": 0.1026,
+      "step": 156
+    },
+    {
+      "epoch": 0.3554046406338427,
+      "grad_norm": 0.34745681285858154,
+      "learning_rate": 8.47997955838829e-06,
+      "loss": 0.0972,
+      "step": 157
+    },
+    {
+      "epoch": 0.3576683644595359,
+      "grad_norm": 0.45281437039375305,
+      "learning_rate": 8.10596112000994e-06,
+      "loss": 0.1073,
+      "step": 158
+    },
+    {
+      "epoch": 0.3599320882852292,
+      "grad_norm": 0.4206264615058899,
+      "learning_rate": 7.739295243326067e-06,
+      "loss": 0.0765,
+      "step": 159
+    },
+    {
+      "epoch": 0.36219581211092244,
+      "grad_norm": 0.14605945348739624,
+      "learning_rate": 7.380082171126228e-06,
+      "loss": 0.0527,
+      "step": 160
+    },
+    {
+      "epoch": 0.36445953593661573,
+      "grad_norm": 0.2502942681312561,
+      "learning_rate": 7.028420108677635e-06,
+      "loss": 0.0844,
+      "step": 161
+    },
+    {
+      "epoch": 0.366723259762309,
+      "grad_norm": 0.39932066202163696,
+      "learning_rate": 6.684405196876842e-06,
+      "loss": 0.1274,
+      "step": 162
+    },
+    {
+      "epoch": 0.36898698358800225,
+      "grad_norm": 0.8233625292778015,
+      "learning_rate": 6.3481314859657675e-06,
+      "loss": 0.1763,
+      "step": 163
+    },
+    {
+      "epoch": 0.37125070741369554,
+      "grad_norm": 1.1194967031478882,
+      "learning_rate": 6.019690909819298e-06,
+      "loss": 0.0902,
+      "step": 164
+    },
+    {
+      "epoch": 0.3735144312393888,
+      "grad_norm": 0.1579848676919937,
+      "learning_rate": 5.6991732608115e-06,
+      "loss": 0.0161,
+      "step": 165
+    },
+    {
+      "epoch": 0.37577815506508205,
+      "grad_norm": 0.21199989318847656,
+      "learning_rate": 5.386666165267256e-06,
+      "loss": 0.015,
+      "step": 166
+    },
+    {
+      "epoch": 0.37804187889077534,
+      "grad_norm": 0.0373234748840332,
+      "learning_rate": 5.08225505950613e-06,
+      "loss": 0.0044,
+      "step": 167
+    },
+    {
+      "epoch": 0.38030560271646857,
+      "grad_norm": 0.1646622270345688,
+      "learning_rate": 4.786023166484913e-06,
+      "loss": 0.037,
+      "step": 168
+    },
+    {
+      "epoch": 0.38256932654216186,
+      "grad_norm": 0.44039657711982727,
+      "learning_rate": 4.498051473045291e-06,
+      "loss": 0.0842,
+      "step": 169
+    },
+    {
+      "epoch": 0.38483305036785515,
+      "grad_norm": 0.3450931906700134,
+      "learning_rate": 4.218418707772886e-06,
+      "loss": 0.0605,
+      "step": 170
+    },
+    {
+      "epoch": 0.3870967741935484,
+      "grad_norm": 0.3942171037197113,
+      "learning_rate": 3.947201319473587e-06,
+      "loss": 0.0997,
+      "step": 171
+    },
+    {
+      "epoch": 0.38936049801924166,
+      "grad_norm": 0.3358860909938812,
+      "learning_rate": 3.684473456273278e-06,
+      "loss": 0.0902,
+      "step": 172
+    },
+    {
+      "epoch": 0.3916242218449349,
+      "grad_norm": 0.24296504259109497,
+      "learning_rate": 3.4303069453464383e-06,
+      "loss": 0.0602,
+      "step": 173
+    },
+    {
+      "epoch": 0.3938879456706282,
+      "grad_norm": 0.2584712505340576,
+      "learning_rate": 3.184771273279312e-06,
+      "loss": 0.0443,
+      "step": 174
+    },
+    {
+      "epoch": 0.39615166949632147,
+      "grad_norm": 0.36259230971336365,
+      "learning_rate": 2.947933567072987e-06,
+      "loss": 0.0835,
+      "step": 175
+    },
+    {
+      "epoch": 0.3984153933220147,
+      "grad_norm": 0.321013867855072,
+      "learning_rate": 2.719858575791534e-06,
+      "loss": 0.0427,
+      "step": 176
+    },
+    {
+      "epoch": 0.400679117147708,
+      "grad_norm": 0.3596048951148987,
+      "learning_rate": 2.500608652860256e-06,
+      "loss": 0.0741,
+      "step": 177
+    },
+    {
+      "epoch": 0.4029428409734012,
+      "grad_norm": 0.24555176496505737,
+      "learning_rate": 2.2902437390188737e-06,
+      "loss": 0.0331,
+      "step": 178
+    },
+    {
+      "epoch": 0.4052065647990945,
+      "grad_norm": 0.24574418365955353,
+      "learning_rate": 2.0888213459343587e-06,
+      "loss": 0.0585,
+      "step": 179
+    },
+    {
+      "epoch": 0.4074702886247878,
+      "grad_norm": 0.46735596656799316,
+      "learning_rate": 1.8963965404777875e-06,
+      "loss": 0.1708,
+      "step": 180
+    },
+    {
+      "epoch": 0.409734012450481,
+      "grad_norm": 0.25513896346092224,
+      "learning_rate": 1.7130219296696263e-06,
+      "loss": 0.0302,
+      "step": 181
+    },
+    {
+      "epoch": 0.4119977362761743,
+      "grad_norm": 0.11415669322013855,
+      "learning_rate": 1.5387476462974824e-06,
+      "loss": 0.011,
+      "step": 182
+    },
+    {
+      "epoch": 0.4142614601018676,
+      "grad_norm": 0.258291095495224,
+      "learning_rate": 1.3736213352103147e-06,
+      "loss": 0.0335,
+      "step": 183
+    },
+    {
+      "epoch": 0.4165251839275608,
+      "grad_norm": 0.24743030965328217,
+      "learning_rate": 1.2176881402928002e-06,
+      "loss": 0.0244,
+      "step": 184
+    },
+    {
+      "epoch": 0.4187889077532541,
+      "grad_norm": 2.3643858432769775,
+      "learning_rate": 1.0709906921234367e-06,
+      "loss": 0.0812,
+      "step": 185
+    },
+    {
+      "epoch": 0.42105263157894735,
+      "grad_norm": 1.5032339096069336,
+      "learning_rate": 9.33569096319799e-07,
+      "loss": 0.0651,
+      "step": 186
+    },
+    {
+      "epoch": 0.42331635540464063,
+      "grad_norm": 1.3170970678329468,
+      "learning_rate": 8.054609225740255e-07,
+      "loss": 0.0513,
+      "step": 187
+    },
+    {
+      "epoch": 0.4255800792303339,
+      "grad_norm": 0.8371462821960449,
+      "learning_rate": 6.867011943816724e-07,
+      "loss": 0.044,
+      "step": 188
+    },
+    {
+      "epoch": 0.42784380305602715,
+      "grad_norm": 0.29720690846443176,
+      "learning_rate": 5.77322379466617e-07,
+      "loss": 0.042,
+      "step": 189
+    },
+    {
+      "epoch": 0.43010752688172044,
+      "grad_norm": 0.2818180322647095,
+      "learning_rate": 4.773543809047186e-07,
+      "loss": 0.0376,
+      "step": 190
+    },
+    {
+      "epoch": 0.43237125070741367,
+      "grad_norm": 0.07917330414056778,
+      "learning_rate": 3.868245289486027e-07,
+      "loss": 0.0183,
+      "step": 191
+    },
+    {
+      "epoch": 0.43463497453310695,
+      "grad_norm": 0.06425964087247849,
+      "learning_rate": 3.0575757355586817e-07,
+      "loss": 0.0136,
+      "step": 192
+    },
+    {
+      "epoch": 0.43689869835880024,
+      "grad_norm": 0.08826740086078644,
+      "learning_rate": 2.3417567762266497e-07,
+      "loss": 0.0147,
+      "step": 193
+    },
+    {
+      "epoch": 0.4391624221844935,
+      "grad_norm": 0.7284664511680603,
+      "learning_rate": 1.7209841092460043e-07,
+      "loss": 0.0203,
+      "step": 194
+    },
+    {
+      "epoch": 0.44142614601018676,
+      "grad_norm": 0.0033301585353910923,
+      "learning_rate": 1.1954274476655534e-07,
+      "loss": 0.0,
+      "step": 195
+    },
+    {
+      "epoch": 0.44368986983588005,
+      "grad_norm": 0.003258542390540242,
+      "learning_rate": 7.652304734289127e-08,
+      "loss": 0.0,
+      "step": 196
+    },
+    {
+      "epoch": 0.4459535936615733,
+      "grad_norm": 0.003287576837465167,
+      "learning_rate": 4.30510798093342e-08,
+      "loss": 0.0,
+      "step": 197
+    },
+    {
+      "epoch": 0.44821731748726656,
+      "grad_norm": 0.003948764875531197,
+      "learning_rate": 1.9135993067588284e-08,
+      "loss": 0.0,
+      "step": 198
+    },
+    {
+      "epoch": 0.4504810413129598,
+      "grad_norm": 0.0040587084367871284,
+      "learning_rate": 4.784325263584854e-09,
+      "loss": 0.0,
+      "step": 199
+    },
+    {
+      "epoch": 0.4527447651386531,
+      "grad_norm": 0.004424667451530695,
+      "learning_rate": 0.0,
+      "loss": 0.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.4527447651386531,
+      "eval_loss": 0.09589990973472595,
+      "eval_runtime": 57.08,
+      "eval_samples_per_second": 13.034,
+      "eval_steps_per_second": 3.259,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.876307616730972e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null