Training in progress, step 32, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1308ab5b004737cda37c6f011533f981173cee903cd2ccf6c1b76fed4bc8922e
 size 97728

 version https://git-lfs.github.com/spec/v1
+oid sha256:446704d171909fe1b90ebb16e3107050b0a85e3467f5f3c73077d0c6783e132b
 size 97728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f7d32e659796260eb80ee744578b61feb131752c58c91330bd88c49794e6421
 size 212298

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffcf1c68b31bc14c84a25b4361564f95d31085e478b3396745e9e7887925d1fd
 size 212298

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9da8f5d493f0f752036f2ae2fae7ea8749d37cf1119ef3a2c45408ef763d8466
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:faa450476dbd198dc4484447171d2b13c39e0fa316de67fcc41765245207305d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4293e50fb3c9f4c5f80a712103de6c5deaef7eaac0675ada5b61f2e11e3e6a04
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c32f0c34d941772d95b11e82ac22de22e969c9c470a009499eff8dbdf7ec5a86
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8402625820568927,
   "eval_steps": 8,
-  "global_step": 24,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -95,6 +95,28 @@
       "eval_samples_per_second": 134.583,
       "eval_steps_per_second": 67.985,
       "step": 24
     }
   ],
   "logging_steps": 3,
@@ -114,7 +136,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 10291064340480.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.136761487964989,
   "eval_steps": 8,
+  "global_step": 32,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 134.583,
       "eval_steps_per_second": 67.985,
       "step": 24
+    },
+    {
+      "epoch": 0.9452954048140044,
+      "grad_norm": 0.15795904397964478,
+      "learning_rate": 0.00019450008187146684,
+      "loss": 10.3562,
+      "step": 27
+    },
+    {
+      "epoch": 1.0667396061269148,
+      "grad_norm": 0.22537720203399658,
+      "learning_rate": 0.00018888354486549237,
+      "loss": 13.5544,
+      "step": 30
+    },
+    {
+      "epoch": 1.136761487964989,
+      "eval_loss": 10.337327003479004,
+      "eval_runtime": 0.7182,
+      "eval_samples_per_second": 135.057,
+      "eval_steps_per_second": 68.225,
+      "step": 32
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 13752604164096.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null