Training in progress, step 128, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +115 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e4f50b92f698330efa14b9aac94764ac34d6d4509dfa5c1fdfb68575bc943c7
 size 479769104

 version https://git-lfs.github.com/spec/v1
+oid sha256:723cdbed8d40933997cc8d9e1926fd97533157645b77f78a31a0777a237955b4
 size 479769104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d6680532138f3cf57460c4e0978c71e3e3fc3aff82bf843a16cb5743ee2bad6
 size 240728084

 version https://git-lfs.github.com/spec/v1
+oid sha256:6eb9600dbce4dc797bd243e119f573cc8a6d76ef08a1e8626fb7f2cb94596db2
 size 240728084

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:804a1bb968b56ae5803ba0d79c37a917e42ea8548fe4b81baead068641d70bad
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9c8242be1e407e2d848669b7b5d69ac9796e0548ad1ac9a2c7f9531a4a28b62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.007240945282778713,
   "eval_steps": 500,
-  "global_step": 112,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -791,6 +791,118 @@
       "learning_rate": 7.225806451612903e-06,
       "loss": 1.4194,
       "step": 112
     }
   ],
   "logging_steps": 1,
@@ -810,7 +922,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.237605007429632e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.008275366037461386,
   "eval_steps": 500,
+  "global_step": 128,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.225806451612903e-06,
       "loss": 1.4194,
       "step": 112
+    },
+    {
+      "epoch": 0.00730559657994638,
+      "grad_norm": 5.50141716003418,
+      "learning_rate": 7.290322580645162e-06,
+      "loss": 1.4837,
+      "step": 113
+    },
+    {
+      "epoch": 0.007370247877114047,
+      "grad_norm": 5.740527153015137,
+      "learning_rate": 7.35483870967742e-06,
+      "loss": 1.5181,
+      "step": 114
+    },
+    {
+      "epoch": 0.007434899174281714,
+      "grad_norm": 6.510746002197266,
+      "learning_rate": 7.4193548387096784e-06,
+      "loss": 1.3614,
+      "step": 115
+    },
+    {
+      "epoch": 0.007499550471449381,
+      "grad_norm": 6.607003211975098,
+      "learning_rate": 7.483870967741936e-06,
+      "loss": 1.4396,
+      "step": 116
+    },
+    {
+      "epoch": 0.007564201768617048,
+      "grad_norm": 6.828821182250977,
+      "learning_rate": 7.548387096774194e-06,
+      "loss": 1.4888,
+      "step": 117
+    },
+    {
+      "epoch": 0.007628853065784715,
+      "grad_norm": 5.51243782043457,
+      "learning_rate": 7.612903225806451e-06,
+      "loss": 1.4666,
+      "step": 118
+    },
+    {
+      "epoch": 0.007693504362952382,
+      "grad_norm": 5.797337532043457,
+      "learning_rate": 7.67741935483871e-06,
+      "loss": 1.3761,
+      "step": 119
+    },
+    {
+      "epoch": 0.007758155660120049,
+      "grad_norm": 5.460038185119629,
+      "learning_rate": 7.741935483870968e-06,
+      "loss": 1.4361,
+      "step": 120
+    },
+    {
+      "epoch": 0.007822806957287716,
+      "grad_norm": 5.366038799285889,
+      "learning_rate": 7.806451612903227e-06,
+      "loss": 1.4087,
+      "step": 121
+    },
+    {
+      "epoch": 0.007887458254455384,
+      "grad_norm": 6.333535194396973,
+      "learning_rate": 7.870967741935484e-06,
+      "loss": 1.4527,
+      "step": 122
+    },
+    {
+      "epoch": 0.00795210955162305,
+      "grad_norm": 5.908946514129639,
+      "learning_rate": 7.935483870967743e-06,
+      "loss": 1.4128,
+      "step": 123
+    },
+    {
+      "epoch": 0.008016760848790718,
+      "grad_norm": 5.050029754638672,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 1.4311,
+      "step": 124
+    },
+    {
+      "epoch": 0.008081412145958386,
+      "grad_norm": 6.2919816970825195,
+      "learning_rate": 8.064516129032258e-06,
+      "loss": 1.3892,
+      "step": 125
+    },
+    {
+      "epoch": 0.008146063443126052,
+      "grad_norm": 5.789970874786377,
+      "learning_rate": 8.129032258064517e-06,
+      "loss": 1.4024,
+      "step": 126
+    },
+    {
+      "epoch": 0.00821071474029372,
+      "grad_norm": 5.259674549102783,
+      "learning_rate": 8.193548387096774e-06,
+      "loss": 1.3323,
+      "step": 127
+    },
+    {
+      "epoch": 0.008275366037461386,
+      "grad_norm": 5.545688152313232,
+      "learning_rate": 8.258064516129033e-06,
+      "loss": 1.332,
+      "step": 128
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8.228794259622298e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null