Training in progress, step 70, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58b2166da01d55a046bf02b2b43ba0d22031515bba03e5d013b617cd11c38a96
 size 101752088

 version https://git-lfs.github.com/spec/v1
+oid sha256:85905a4efa045b0030db51c0398a6f359f039aba18cec9509006d5d5b8af8d05
 size 101752088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2a6dead6d577cc2fdab009dff4a17fbd4eeb91af9b65d3440cf7f72177a3b37
 size 52046596

 version https://git-lfs.github.com/spec/v1
+oid sha256:1991c9eb89d48a8ffc4f37213c19848b47dcef3c2b8314121e579e8434fb0c91
 size 52046596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b98ac7b1e9243bd4e083ddd1c46b98c631cbc03a3912122b4b6336d976fc04e1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:65f78a9649ee8cc6d0276caf10c7c8bafae430ddf310a187a7b4c38627fd2b56
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bde2757391e94ff5103cea79868bee6f1ccc90f20c64e82cf9933fa7b5accd0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f98a8feef34550913a6c17e6d111551876ee5198dbf8b76141d29b9c822b726
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.007552394738498332,
   "eval_steps": 25,
-  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -451,6 +451,76 @@
       "learning_rate": 8.263518223330697e-05,
       "loss": 0.0118,
       "step": 60
     }
   ],
   "logging_steps": 1,
@@ -470,7 +540,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.95123040976896e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.008811127194914722,
   "eval_steps": 25,
+  "global_step": 70,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.263518223330697e-05,
       "loss": 0.0118,
       "step": 60
+    },
+    {
+      "epoch": 0.007678267984139971,
+      "grad_norm": 0.029303928837180138,
+      "learning_rate": 7.920883091822408e-05,
+      "loss": 0.0006,
+      "step": 61
+    },
+    {
+      "epoch": 0.00780414122978161,
+      "grad_norm": 0.16770148277282715,
+      "learning_rate": 7.580781044003324e-05,
+      "loss": 0.0068,
+      "step": 62
+    },
+    {
+      "epoch": 0.007930014475423248,
+      "grad_norm": 0.1437792032957077,
+      "learning_rate": 7.243626441830009e-05,
+      "loss": 0.0024,
+      "step": 63
+    },
+    {
+      "epoch": 0.008055887721064888,
+      "grad_norm": 0.11593683063983917,
+      "learning_rate": 6.909830056250527e-05,
+      "loss": 0.0146,
+      "step": 64
+    },
+    {
+      "epoch": 0.008181760966706527,
+      "grad_norm": 1.9799082279205322,
+      "learning_rate": 6.579798566743314e-05,
+      "loss": 0.0358,
+      "step": 65
+    },
+    {
+      "epoch": 0.008307634212348165,
+      "grad_norm": 0.15605556964874268,
+      "learning_rate": 6.25393406584088e-05,
+      "loss": 0.0031,
+      "step": 66
+    },
+    {
+      "epoch": 0.008433507457989804,
+      "grad_norm": 0.03782504051923752,
+      "learning_rate": 5.9326335692419995e-05,
+      "loss": 0.0008,
+      "step": 67
+    },
+    {
+      "epoch": 0.008559380703631443,
+      "grad_norm": 0.013293488882482052,
+      "learning_rate": 5.616288532109225e-05,
+      "loss": 0.0006,
+      "step": 68
+    },
+    {
+      "epoch": 0.008685253949273082,
+      "grad_norm": 2.1244702339172363,
+      "learning_rate": 5.305284372141095e-05,
+      "loss": 0.3516,
+      "step": 69
+    },
+    {
+      "epoch": 0.008811127194914722,
+      "grad_norm": 0.1244107261300087,
+      "learning_rate": 5.000000000000002e-05,
+      "loss": 0.0042,
+      "step": 70
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.60976881139712e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null