Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +91 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a639c0dd8d46132adc0b16337b5a1ff36e268cf252a3de28258698f829ef7ce6
 size 125048

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ce726842d250f20c86c371d9e87a8ce9846060e663f40a17aa5220f549de934
 size 125048

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:53ecb0ea5d82d22db059b7add3506c06a0cd8eeb38fa9e49da520bca058f53e6
 size 162868

 version https://git-lfs.github.com/spec/v1
+oid sha256:89d5bd83a26023b846c84f45639c04652bc00657132d2cf830dd829784d28815
 size 162868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14f613e9aa4b1eff57e81d3c847842d7ccd502bc7cfeef73e08e7430e2140097
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ebd8401c143f16f72b022f70333a3cfcf5c180eb2884806e8d3b16f8611859c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb7ebe91aa688ab052f1c015d887206a7b417ef70ab8e5d1552c4ac1b55fa0b6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:53ada7ec389029bea855f553cfcb8ba6729038e26afcd278435a9c0b241e2783
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.02279281616211,
-  "best_model_checkpoint": "miner_id_24/checkpoint-190",
-  "epoch": 0.008587376556462,
   "eval_steps": 5,
-  "global_step": 190,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1649,6 +1649,92 @@
       "eval_samples_per_second": 52.881,
       "eval_steps_per_second": 26.443,
       "step": 190
     }
   ],
   "logging_steps": 1,
@@ -1677,7 +1763,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1996278988800.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.022467613220215,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.009039343743644212,
   "eval_steps": 5,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 52.881,
       "eval_steps_per_second": 26.443,
       "step": 190
+    },
+    {
+      "epoch": 0.008632573275180222,
+      "grad_norm": 0.5897504091262817,
+      "learning_rate": 0.0001398912386251042,
+      "loss": 44.0428,
+      "step": 191
+    },
+    {
+      "epoch": 0.008677769993898443,
+      "grad_norm": 0.4917847514152527,
+      "learning_rate": 0.00013930250316539238,
+      "loss": 44.0819,
+      "step": 192
+    },
+    {
+      "epoch": 0.008722966712616664,
+      "grad_norm": 0.4644782245159149,
+      "learning_rate": 0.00013871215213371284,
+      "loss": 44.0209,
+      "step": 193
+    },
+    {
+      "epoch": 0.008768163431334885,
+      "grad_norm": 0.6393492817878723,
+      "learning_rate": 0.00013812020979708418,
+      "loss": 44.101,
+      "step": 194
+    },
+    {
+      "epoch": 0.008813360150053106,
+      "grad_norm": 0.60307377576828,
+      "learning_rate": 0.00013752670048793744,
+      "loss": 44.1646,
+      "step": 195
+    },
+    {
+      "epoch": 0.008813360150053106,
+      "eval_loss": 11.022566795349121,
+      "eval_runtime": 176.0184,
+      "eval_samples_per_second": 52.932,
+      "eval_steps_per_second": 26.469,
+      "step": 195
+    },
+    {
+      "epoch": 0.008858556868771328,
+      "grad_norm": 0.4305557608604431,
+      "learning_rate": 0.00013693164860311565,
+      "loss": 44.0883,
+      "step": 196
+    },
+    {
+      "epoch": 0.008903753587489549,
+      "grad_norm": 0.4658234119415283,
+      "learning_rate": 0.00013633507860287116,
+      "loss": 44.1006,
+      "step": 197
+    },
+    {
+      "epoch": 0.00894895030620777,
+      "grad_norm": 0.5248441100120544,
+      "learning_rate": 0.0001357370150098601,
+      "loss": 44.0716,
+      "step": 198
+    },
+    {
+      "epoch": 0.008994147024925991,
+      "grad_norm": 0.5177784562110901,
+      "learning_rate": 0.0001351374824081343,
+      "loss": 44.1013,
+      "step": 199
+    },
+    {
+      "epoch": 0.009039343743644212,
+      "grad_norm": 0.5134817361831665,
+      "learning_rate": 0.00013453650544213076,
+      "loss": 44.0501,
+      "step": 200
+    },
+    {
+      "epoch": 0.009039343743644212,
+      "eval_loss": 11.022467613220215,
+      "eval_runtime": 176.1703,
+      "eval_samples_per_second": 52.886,
+      "eval_steps_per_second": 26.446,
+      "step": 200
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2101346304000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null