Training in progress, step 45, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fd80c0ca43223112510abd5648a46828063e848224341614900fb2b903f03fb
 size 500770656

 version https://git-lfs.github.com/spec/v1
+oid sha256:c54141e12ca416558c55f6f839ee300401215d3b2d857808e6cd3e025419836d
 size 500770656

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fdd8f58ff1b028267394992bbdfcf5ddcb4f7a3b2791b6b112b08bb65ffc2450
 size 1001863522

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d93434e8a731cac60caa8516ba040aa3b37b0e6c3845a7b7f3df87d3941857c
 size 1001863522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3c459c5168d5e87139750e50664d3395ec01ca6e7283c2abaa97aef05422d45
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0005955a8b30005a7c1e394169fcb6835f59e33fbdd62aa53296d2507b299dc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1fbe8f3807fcf2c9b342444bf912c2b4a5837aba74951c80e3f443bde6ab488
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e917c3eea1680bab35c1f582cbc388a7464296952762325512c323f5f76cc5cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.048692515779982,
   "eval_steps": 9,
-  "global_step": 36,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -131,6 +131,35 @@
       "eval_samples_per_second": 1.845,
       "eval_steps_per_second": 0.93,
       "step": 36
     }
   ],
   "logging_steps": 3,
@@ -150,7 +179,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.6740693753856e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.308385933273219,
   "eval_steps": 9,
+  "global_step": 45,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.845,
       "eval_steps_per_second": 0.93,
       "step": 36
+    },
+    {
+      "epoch": 1.1352569882777277,
+      "grad_norm": 7.26214075088501,
+      "learning_rate": 0.00017343225094356855,
+      "loss": 42.9364,
+      "step": 39
+    },
+    {
+      "epoch": 1.2218214607754734,
+      "grad_norm": 8.307271003723145,
+      "learning_rate": 0.00016494480483301836,
+      "loss": 43.8462,
+      "step": 42
+    },
+    {
+      "epoch": 1.308385933273219,
+      "grad_norm": 8.332019805908203,
+      "learning_rate": 0.00015555702330196023,
+      "loss": 43.2469,
+      "step": 45
+    },
+    {
+      "epoch": 1.308385933273219,
+      "eval_loss": 1.39437735080719,
+      "eval_runtime": 63.2944,
+      "eval_samples_per_second": 1.849,
+      "eval_steps_per_second": 0.932,
+      "step": 45
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 4.592586719232e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null