Training in progress, step 90, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +91 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:819d34c3c14db785e0dd28311c01d0c8f6dcd03cdecfe856b6d00ab306a6259e
 size 125048

 version https://git-lfs.github.com/spec/v1
+oid sha256:34c32e60c57250c51934f542ff2e75446d1c4cf392e777594b077e17ea6ad239
 size 125048

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf9b6d9e891724bb31db21c88f9512591eca8a4a6ad4e8699987a16aa8a16498
 size 162868

 version https://git-lfs.github.com/spec/v1
+oid sha256:78cd83f855bd80bf0c0a895685a76b346681eb6199b8673ce02a1764809414f1
 size 162868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77d6b80ff98d1964921d012a443cb87b248a1f2b5da6296fdc6b0c8c5f518f22
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:820fcfa3250b03ccb2011d1c28382f78231a7cd53b56f3e8494ea4c7f9ea8506
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7803647d52fbd7429a283dc695ba7cf653ff890c06d5c50f67d0a09610438889
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:23b27ab0ae2b9af6f3d4c84cdaf8b0fc887acf71f8f726b270a3bce2845000a9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.038910865783691,
-  "best_model_checkpoint": "miner_id_24/checkpoint-80",
-  "epoch": 0.0036157374974576844,
   "eval_steps": 5,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -703,6 +703,92 @@
       "eval_samples_per_second": 52.822,
       "eval_steps_per_second": 26.414,
       "step": 80
     }
   ],
   "logging_steps": 1,
@@ -731,7 +817,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 840538521600.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.034589767456055,
+  "best_model_checkpoint": "miner_id_24/checkpoint-90",
+  "epoch": 0.004067704684639895,
   "eval_steps": 5,
+  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 52.822,
       "eval_steps_per_second": 26.414,
       "step": 80
+    },
+    {
+      "epoch": 0.0036609342161759055,
+      "grad_norm": 0.7481367588043213,
+      "learning_rate": 0.0001898168561213419,
+      "loss": 44.2182,
+      "step": 81
+    },
+    {
+      "epoch": 0.0037061309348941267,
+      "grad_norm": 0.628414511680603,
+      "learning_rate": 0.0001895331334332753,
+      "loss": 44.1519,
+      "step": 82
+    },
+    {
+      "epoch": 0.003751327653612348,
+      "grad_norm": 0.658549964427948,
+      "learning_rate": 0.0001892457303887706,
+      "loss": 44.1364,
+      "step": 83
+    },
+    {
+      "epoch": 0.0037965243723305686,
+      "grad_norm": 0.5245007276535034,
+      "learning_rate": 0.0001889546588018412,
+      "loss": 44.1079,
+      "step": 84
+    },
+    {
+      "epoch": 0.00384172109104879,
+      "grad_norm": 0.5555324554443359,
+      "learning_rate": 0.00018865993063730004,
+      "loss": 44.1445,
+      "step": 85
+    },
+    {
+      "epoch": 0.00384172109104879,
+      "eval_loss": 11.036417007446289,
+      "eval_runtime": 176.131,
+      "eval_samples_per_second": 52.898,
+      "eval_steps_per_second": 26.452,
+      "step": 85
+    },
+    {
+      "epoch": 0.003886917809767011,
+      "grad_norm": 0.43622660636901855,
+      "learning_rate": 0.00018836155801026753,
+      "loss": 44.1515,
+      "step": 86
+    },
+    {
+      "epoch": 0.003932114528485232,
+      "grad_norm": 0.578544020652771,
+      "learning_rate": 0.0001880595531856738,
+      "loss": 44.0766,
+      "step": 87
+    },
+    {
+      "epoch": 0.003977311247203453,
+      "grad_norm": 0.598685085773468,
+      "learning_rate": 0.00018775392857775432,
+      "loss": 44.1756,
+      "step": 88
+    },
+    {
+      "epoch": 0.004022507965921674,
+      "grad_norm": 0.5733134150505066,
+      "learning_rate": 0.00018744469674953956,
+      "loss": 44.1756,
+      "step": 89
+    },
+    {
+      "epoch": 0.004067704684639895,
+      "grad_norm": 0.5177151560783386,
+      "learning_rate": 0.00018713187041233896,
+      "loss": 44.173,
+      "step": 90
+    },
+    {
+      "epoch": 0.004067704684639895,
+      "eval_loss": 11.034589767456055,
+      "eval_runtime": 176.3402,
+      "eval_samples_per_second": 52.835,
+      "eval_steps_per_second": 26.421,
+      "step": 90
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 945605836800.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null