Model save

Files changed (5) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/polyagent/huggingface/runs/bm5sjqkj)
 This model was trained with SFT.

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/polyagent/huggingface/runs/vkmqfqba)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -2,8 +2,8 @@
     "epoch": 3.0,
     "total_flos": 7.187967826039144e+18,
     "train_loss": 1.5352961912832626,
-    "train_runtime": 6906.6629,
     "train_samples": 95663,
-    "train_samples_per_second": 10.159,
-    "train_steps_per_second": 0.159
 }

     "epoch": 3.0,
     "total_flos": 7.187967826039144e+18,
     "train_loss": 1.5352961912832626,
+    "train_runtime": 6782.5225,
     "train_samples": 95663,
+    "train_samples_per_second": 10.345,
+    "train_steps_per_second": 0.162
 }

runs/Dec26_15-04-59_mia1-gpu-110/events.out.tfevents.1735225539.mia1-gpu-110.3949310.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d292a3c8ae6549f27f080bec078e2d7d53a775f85fbc9777ab9f3de1211f723c
-size 53072

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5fbca0ac8860670dcea47250e8f7e7140d862e1a0bfd09b12bca1a327504fc9
+size 53426

train_results.json CHANGED Viewed

@@ -2,8 +2,8 @@
     "epoch": 3.0,
     "total_flos": 7.187967826039144e+18,
     "train_loss": 1.5352961912832626,
-    "train_runtime": 6906.6629,
     "train_samples": 95663,
-    "train_samples_per_second": 10.159,
-    "train_steps_per_second": 0.159
 }

     "epoch": 3.0,
     "total_flos": 7.187967826039144e+18,
     "train_loss": 1.5352961912832626,
+    "train_runtime": 6782.5225,
     "train_samples": 95663,
+    "train_samples_per_second": 10.345,
+    "train_steps_per_second": 0.162
 }

trainer_state.json CHANGED Viewed

@@ -437,10 +437,10 @@
     },
     {
       "epoch": 0.819672131147541,
-      "eval_loss": 1.5333800315856934,
-      "eval_runtime": 19.9646,
-      "eval_samples_per_second": 35.463,
-      "eval_steps_per_second": 1.152,
       "step": 300
     },
     {
@@ -865,10 +865,10 @@
     },
     {
       "epoch": 1.639344262295082,
-      "eval_loss": 1.5181070566177368,
-      "eval_runtime": 19.9478,
-      "eval_samples_per_second": 35.493,
-      "eval_steps_per_second": 1.153,
       "step": 600
     },
     {
@@ -1293,10 +1293,10 @@
     },
     {
       "epoch": 2.459016393442623,
-      "eval_loss": 1.5161317586898804,
-      "eval_runtime": 19.9284,
-      "eval_samples_per_second": 35.527,
-      "eval_steps_per_second": 1.154,
       "step": 900
     },
     {
@@ -1577,9 +1577,9 @@
       "step": 1098,
       "total_flos": 7.187967826039144e+18,
       "train_loss": 1.5352961912832626,
-      "train_runtime": 6906.6629,
-      "train_samples_per_second": 10.159,
-      "train_steps_per_second": 0.159
     }
   ],
   "logging_steps": 5,

     },
     {
       "epoch": 0.819672131147541,
+      "eval_loss": 1.533558964729309,
+      "eval_runtime": 19.6321,
+      "eval_samples_per_second": 36.063,
+      "eval_steps_per_second": 1.172,
       "step": 300
     },
     {
     },
     {
       "epoch": 1.639344262295082,
+      "eval_loss": 1.5182926654815674,
+      "eval_runtime": 19.6502,
+      "eval_samples_per_second": 36.03,
+      "eval_steps_per_second": 1.17,
       "step": 600
     },
     {
     },
     {
       "epoch": 2.459016393442623,
+      "eval_loss": 1.5163270235061646,
+      "eval_runtime": 19.6134,
+      "eval_samples_per_second": 36.098,
+      "eval_steps_per_second": 1.173,
       "step": 900
     },
     {
       "step": 1098,
       "total_flos": 7.187967826039144e+18,
       "train_loss": 1.5352961912832626,
+      "train_runtime": 6782.5225,
+      "train_samples_per_second": 10.345,
+      "train_steps_per_second": 0.162
     }
   ],
   "logging_steps": 5,