Training in progress, step 263, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +53 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c6e5c39604ce8318809f0f58050866a16208e2d2602dead51d983b132e683fb
 size 93608

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5b1af8c1d1044559e4566bbe8a9da01e3a7cd6a8b87d6052aba8332beb6fc41
 size 93608

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:64c40d1bc73f541b515340dac6469684a54f37cc18606adddfab612440adebee
 size 197158

 version https://git-lfs.github.com/spec/v1
+oid sha256:550fcf36ce2e98b0de4e1aeef16362ea975811edfde74052fd22cc5f18443aff
 size 197158

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae275df9131dadab3e84666c81acf7aef0f1e0d393e070db94afde55cc7a04cb
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:2bdcfd957e9096fb21e3e831d470a6e7f9eb410847ee84b1c39e0af81ca448b3
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7599f9359c4d25abeca594df3574d460ca4601b65af6b90fc9847ccb839f2fa
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2d933dbe0de678b7fe46f2875c92f23e4a28843792efe278b61113788ec0702
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae756a5b4dca2f7ad8c44c4862c84851fc3ad00fdb24067fecc92d6f2b533dac
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:5af588657011060d064f635dca4d0d830bcd43082eaae2d0fed1a5df4b6b5ddc
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:753fe59205deb0859689da31f17244e755cd7f229128f79e49d0491eec8ee8fa
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e1c71efe8f09776760711a07fe747388a51628ea866272ad0a46951f43e370b
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d11cedd0890a662e2118d0364333043dfd399c79d0a1ea8d4a89bb7cbc6705ac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6c4c21ea8440b1f22c3f50a73abd0d48167dee8529fcaea1c9b7867aa3874fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.763532763532764,
   "eval_steps": 22,
-  "global_step": 242,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -663,6 +663,55 @@
       "eval_samples_per_second": 407.478,
       "eval_steps_per_second": 13.122,
       "step": 242
     }
   ],
   "logging_steps": 3,
@@ -677,12 +726,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5281217249280.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.005698005698006,
   "eval_steps": 22,
+  "global_step": 263,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 407.478,
       "eval_steps_per_second": 13.122,
       "step": 242
+    },
+    {
+      "epoch": 2.774928774928775,
+      "grad_norm": 0.034080274403095245,
+      "learning_rate": 1.5340001884502574e-06,
+      "loss": 11.9085,
+      "step": 243
+    },
+    {
+      "epoch": 2.809116809116809,
+      "grad_norm": 0.02778993546962738,
+      "learning_rate": 1.1098984190808404e-06,
+      "loss": 11.909,
+      "step": 246
+    },
+    {
+      "epoch": 2.8433048433048436,
+      "grad_norm": 0.039922092109918594,
+      "learning_rate": 7.536344767570536e-07,
+      "loss": 11.9091,
+      "step": 249
+    },
+    {
+      "epoch": 2.8774928774928776,
+      "grad_norm": 0.0371006578207016,
+      "learning_rate": 4.6570269818346224e-07,
+      "loss": 11.9102,
+      "step": 252
+    },
+    {
+      "epoch": 2.9116809116809117,
+      "grad_norm": 0.03678058087825775,
+      "learning_rate": 2.46502605258464e-07,
+      "loss": 11.9108,
+      "step": 255
+    },
+    {
+      "epoch": 2.9458689458689458,
+      "grad_norm": 0.02875494584441185,
+      "learning_rate": 9.633835071463094e-08,
+      "loss": 11.9097,
+      "step": 258
+    },
+    {
+      "epoch": 2.98005698005698,
+      "grad_norm": 0.025189543142914772,
+      "learning_rate": 1.5418296089358963e-08,
+      "loss": 11.9096,
+      "step": 261
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5733169889280.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null