Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +47 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab64d18f054dfcdea19d99ed5b73097e8f275079ec51c2a5aed567137d2c1dc4
 size 42002136

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd3d66cb4b394c16d9f5180b30c893c4433cf7f698436fa62fd3ebe42686a21a
 size 42002136

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11a74b01c0d89bed8cf29448cc690a19b8b6deda66a64663fb9b39eedc0d66f2
 size 21822612

 version https://git-lfs.github.com/spec/v1
+oid sha256:74e1d32b6a73f8e6d5548b4ae4dcd2255ca42ca3b7df3d113fd09da31fb4bb06
 size 21822612

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aaed7d90a5f89319fda24880971a10d1089f97da9f01931c815ca0b6e7db0f70
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ca64702d15008eb5be59666b7714ec889b55ee755aef1a691dbe423dca958b1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa716f361012bfea1a362903884260726692b93d0166a77b9b223670c0a7732f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca9a25c72339c898b564e0c464a3f6fc75bbeec408008928b7ed05533156b98c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0010948828264821275,
   "eval_steps": 50,
-  "global_step": 195,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1404,6 +1404,49 @@
       "learning_rate": 3.415506993330153e-07,
       "loss": 0.6357,
       "step": 195
     }
   ],
   "logging_steps": 1,
@@ -1418,12 +1461,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.4406500688592896e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0011229567451098742,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.415506993330153e-07,
       "loss": 0.6357,
       "step": 195
+    },
+    {
+      "epoch": 0.001100497610207677,
+      "grad_norm": 0.6193792819976807,
+      "learning_rate": 2.1863727812254653e-07,
+      "loss": 0.8182,
+      "step": 196
+    },
+    {
+      "epoch": 0.0011061123939332261,
+      "grad_norm": 0.6518222093582153,
+      "learning_rate": 1.230030851695263e-07,
+      "loss": 0.7356,
+      "step": 197
+    },
+    {
+      "epoch": 0.0011117271776587756,
+      "grad_norm": 0.6630931496620178,
+      "learning_rate": 5.467426590739511e-08,
+      "loss": 0.738,
+      "step": 198
+    },
+    {
+      "epoch": 0.001117341961384325,
+      "grad_norm": 0.5655919909477234,
+      "learning_rate": 1.3669500753099585e-08,
+      "loss": 0.6549,
+      "step": 199
+    },
+    {
+      "epoch": 0.0011229567451098742,
+      "grad_norm": 0.4997493624687195,
+      "learning_rate": 0.0,
+      "loss": 0.6157,
+      "step": 200
+    },
+    {
+      "epoch": 0.0011229567451098742,
+      "eval_loss": 0.7180939316749573,
+      "eval_runtime": 21573.751,
+      "eval_samples_per_second": 1.738,
+      "eval_steps_per_second": 1.738,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.477589814214656e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null