Training in progress, epoch 2

Browse files

Files changed (8) hide show

model.safetensors +1 -1
run-0/checkpoint-1126/model.safetensors +1 -1
run-0/checkpoint-1126/optimizer.pt +1 -1
run-0/checkpoint-1126/rng_state.pth +1 -1
run-0/checkpoint-1126/scheduler.pt +1 -1
run-0/checkpoint-1126/trainer_state.json +19 -28
run-0/checkpoint-1126/training_args.bin +1 -1
runs/Dec08_21-54-18_6ff5f1d99a72/events.out.tfevents.1733694862.6ff5f1d99a72.23.0 +2 -2

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa81e66f1e22f8c42ce00493a180958c1211c7b9164379b96b4ebc9dab28e641
 size 498625128

 version https://git-lfs.github.com/spec/v1
+oid sha256:48e3134343bbdcbaa2d0e515e448ba031cd266949636f85cccb6bf345f14ac59
 size 498625128

run-0/checkpoint-1126/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:771fe01e68dea441f9d1f001f494502b2924afcf69501e14bed2d8bbf7f7e041
 size 498625128

 version https://git-lfs.github.com/spec/v1
+oid sha256:48e3134343bbdcbaa2d0e515e448ba031cd266949636f85cccb6bf345f14ac59
 size 498625128

run-0/checkpoint-1126/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:221ee965008ac2eb3c0fc81d4f1565027a347d9d16b700f1317e8d9111eeb29b
 size 997370106

 version https://git-lfs.github.com/spec/v1
+oid sha256:32fdabad5a7c43b58ebf4e0377335271538dcd50c163715b561dde06d4217241
 size 997370106

run-0/checkpoint-1126/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:279aab4142ba2bc15a26a5c73e18f555c0949931f163d68de9a3f73e4f0c4c00
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:eed3091eb761444c21e8ad4810c28637b0a0c9c3103bbcc5f996e34a658356bc
 size 14244

run-0/checkpoint-1126/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a7033fbbed1376a1f6fd921382165bc74f54f4669790c617915759697a015e6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:638bb34837054d7ce11745cb0ee2d34d0fef98955b259d1225512652a3d97593
 size 1064

run-0/checkpoint-1126/trainer_state.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "best_metric": 0.6141765793522762,
-  "best_model_checkpoint": "students_scores_model/run-0/checkpoint-1126",
   "epoch": 2.0,
   "eval_steps": 500,
   "global_step": 1126,
@@ -10,41 +10,32 @@
   "log_history": [
     {
       "epoch": 0.8880994671403197,
-      "grad_norm": 9.486105918884277,
-      "learning_rate": 1.6060239187203855e-05,
-      "loss": 1.0687,
       "step": 500
     },
     {
       "epoch": 1.0,
-      "eval_f1": 0.559496193541961,
-      "eval_loss": 1.0184900760650635,
-      "eval_runtime": 37.7502,
-      "eval_samples_per_second": 59.602,
-      "eval_steps_per_second": 7.47,
       "step": 563
     },
     {
       "epoch": 1.7761989342806395,
-      "grad_norm": 16.897672653198242,
-      "learning_rate": 9.306564171558836e-06,
-      "loss": 0.8626,
       "step": 1000
-    },
-    {
-      "epoch": 2.0,
-      "eval_f1": 0.6141765793522762,
-      "eval_loss": 0.877347469329834,
-      "eval_runtime": 37.6032,
-      "eval_samples_per_second": 59.835,
-      "eval_steps_per_second": 7.499,
-      "step": 1126
     }
   ],
   "logging_steps": 500,
-  "max_steps": 1689,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -53,7 +44,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
@@ -62,9 +53,9 @@
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 2.2813914202848873e-05,
-    "num_train_epochs": 3,
     "per_device_train_batch_size": 16,
-    "weight_decay": 0.0012064342197859815
   }
 }

 {
+  "best_metric": 0.568809428898138,
+  "best_model_checkpoint": "students_scores_model/run-0/checkpoint-563",
   "epoch": 2.0,
   "eval_steps": 500,
   "global_step": 1126,
   "log_history": [
     {
       "epoch": 0.8880994671403197,
+      "grad_norm": 33.89450454711914,
+      "learning_rate": 7.71014137309043e-06,
+      "loss": 1.0858,
       "step": 500
     },
     {
       "epoch": 1.0,
+      "eval_f1": 0.568809428898138,
+      "eval_loss": 0.9511491060256958,
+      "eval_runtime": 37.4937,
+      "eval_samples_per_second": 60.01,
+      "eval_steps_per_second": 7.521,
       "step": 563
     },
     {
       "epoch": 1.7761989342806395,
+      "grad_norm": 20.078083038330078,
+      "learning_rate": 1.5518814904303424e-06,
+      "loss": 0.876,
       "step": 1000
     }
   ],
   "logging_steps": 500,
+  "max_steps": 1126,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 1.386840125575052e-05,
+    "num_train_epochs": 2,
     "per_device_train_batch_size": 16,
+    "weight_decay": 0.010177242974562553
   }
 }

run-0/checkpoint-1126/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86e506878344e67934428f3176c568da4bb4fdfd8b8918c71494a2c9eb08433b
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:b18f3514f71ccee1e74cafceb4cbd84c3f4bda80bd3c08496a257871a520e8eb
 size 5368

runs/Dec08_21-54-18_6ff5f1d99a72/events.out.tfevents.1733694862.6ff5f1d99a72.23.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dae4d45bd72b4cba87e8f635492bae932d6fc162479d4bf817569f99035e250d
-size 5900

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5737c556ae1cbcf28dd6308712f2d4176e5cd3f413935519c93a3983639583d
+size 6111