Training in progress, step 48, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +115 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a31a542e2dbb2c9cbe799386642c1e0b77dc5941a765201ea34a5511bc04ab63
 size 479769104

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fa6e8dd9233e51c2bb7a726c7c61272bc9817f9ced95cd515b6694d8ba56962
 size 479769104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ed5cc0419647052fd30648d8c3078b7b3cb31f218b1103a547918cea2f1358f
 size 240728084

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9fde0f59df262cf71a6aabd7d3d62e074982e8aa6dfb99efe152ae0a4de67dc
 size 240728084

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85b7a108a3eca8766b6cabea067222bf8fc5ccd85d84371fb8aa5547b4f34b95
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a58743f32ef93b4d40e69256e320c5dd50edad767f0200733beb3941b6d4ca7c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0020688415093653464,
   "eval_steps": 500,
-  "global_step": 32,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -231,6 +231,118 @@
       "learning_rate": 2.0645161290322582e-06,
       "loss": 1.5938,
       "step": 32
     }
   ],
   "logging_steps": 1,
@@ -250,7 +362,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.068879221404467e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.00310326226404802,
   "eval_steps": 500,
+  "global_step": 48,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.0645161290322582e-06,
       "loss": 1.5938,
       "step": 32
+    },
+    {
+      "epoch": 0.0021334928065330134,
+      "grad_norm": 5.998967170715332,
+      "learning_rate": 2.129032258064516e-06,
+      "loss": 1.5716,
+      "step": 33
+    },
+    {
+      "epoch": 0.002198144103700681,
+      "grad_norm": 6.407791614532471,
+      "learning_rate": 2.1935483870967745e-06,
+      "loss": 1.5848,
+      "step": 34
+    },
+    {
+      "epoch": 0.002262795400868348,
+      "grad_norm": 6.110191822052002,
+      "learning_rate": 2.2580645161290324e-06,
+      "loss": 1.643,
+      "step": 35
+    },
+    {
+      "epoch": 0.0023274466980360148,
+      "grad_norm": 6.058048248291016,
+      "learning_rate": 2.3225806451612907e-06,
+      "loss": 1.5242,
+      "step": 36
+    },
+    {
+      "epoch": 0.0023920979952036818,
+      "grad_norm": 7.348267078399658,
+      "learning_rate": 2.3870967741935486e-06,
+      "loss": 1.5112,
+      "step": 37
+    },
+    {
+      "epoch": 0.002456749292371349,
+      "grad_norm": 6.676770210266113,
+      "learning_rate": 2.4516129032258066e-06,
+      "loss": 1.5184,
+      "step": 38
+    },
+    {
+      "epoch": 0.002521400589539016,
+      "grad_norm": 9.655611038208008,
+      "learning_rate": 2.5161290322580645e-06,
+      "loss": 1.4867,
+      "step": 39
+    },
+    {
+      "epoch": 0.002586051886706683,
+      "grad_norm": 10.357527732849121,
+      "learning_rate": 2.580645161290323e-06,
+      "loss": 1.5255,
+      "step": 40
+    },
+    {
+      "epoch": 0.00265070318387435,
+      "grad_norm": 6.523240566253662,
+      "learning_rate": 2.645161290322581e-06,
+      "loss": 1.5326,
+      "step": 41
+    },
+    {
+      "epoch": 0.002715354481042017,
+      "grad_norm": 5.7459282875061035,
+      "learning_rate": 2.709677419354839e-06,
+      "loss": 1.5533,
+      "step": 42
+    },
+    {
+      "epoch": 0.0027800057782096845,
+      "grad_norm": 5.776258945465088,
+      "learning_rate": 2.774193548387097e-06,
+      "loss": 1.5261,
+      "step": 43
+    },
+    {
+      "epoch": 0.0028446570753773515,
+      "grad_norm": 7.176516056060791,
+      "learning_rate": 2.8387096774193553e-06,
+      "loss": 1.4434,
+      "step": 44
+    },
+    {
+      "epoch": 0.0029093083725450185,
+      "grad_norm": 6.083931922912598,
+      "learning_rate": 2.903225806451613e-06,
+      "loss": 1.5127,
+      "step": 45
+    },
+    {
+      "epoch": 0.0029739596697126855,
+      "grad_norm": 8.212278366088867,
+      "learning_rate": 2.967741935483871e-06,
+      "loss": 1.569,
+      "step": 46
+    },
+    {
+      "epoch": 0.003038610966880353,
+      "grad_norm": 6.138173580169678,
+      "learning_rate": 3.0322580645161295e-06,
+      "loss": 1.4985,
+      "step": 47
+    },
+    {
+      "epoch": 0.00310326226404802,
+      "grad_norm": 9.2051362991333,
+      "learning_rate": 3.0967741935483874e-06,
+      "loss": 1.4528,
+      "step": 48
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.1121051098669056e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null