Training in progress, step 298, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +417 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5adf84e3dd7beb3769a77eea1152e99bbdbac831184bc3663a32de3dc9625515
 size 1154870440

 version https://git-lfs.github.com/spec/v1
+oid sha256:892b19e9b1c6ec8c931ec31eaaea64c1f54d229ad9301361d06c23010fe58615
 size 1154870440

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0a36e6b4cd6f4ce7b967eb5ab7b4ea01d50b724f9dce1c3caa5286a9bab778f
 size 2309999768

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a6cbc8b8810e660d48486c9cac79327a7e9d0bfe0336dab2b0b2be950dafd55
 size 2309999768

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cba91b1bc4ddb9a8958c2818c4a9e3864b75cb3e59dbbea15849602039e27f3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec20f60ea398d86da91aa0036eb29c67b6647d2b936f7f70a003e8aecb18b498
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70731b6c1655d0e3656aa4aa24acc1486de561f2947adf3a592e6bdf8c91a623
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7299d6d674e2331f7fef6ad21b861c3a91e48af7b3ecf10af405659140b65185
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8013411567476949,
   "eval_steps": 500,
-  "global_step": 239,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1680,6 +1680,419 @@
       "learning_rate": 2.0354603547267985e-05,
       "loss": 0.4991,
       "step": 239
     }
   ],
   "logging_steps": 1,
@@ -1694,12 +2107,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.6093164856777114e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9991617770326907,
   "eval_steps": 500,
+  "global_step": 298,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.0354603547267985e-05,
       "loss": 0.4991,
       "step": 239
+    },
+    {
+      "epoch": 0.8046940486169321,
+      "grad_norm": 15.609128952026367,
+      "learning_rate": 1.9719515643116674e-05,
+      "loss": 0.885,
+      "step": 240
+    },
+    {
+      "epoch": 0.8080469404861693,
+      "grad_norm": 14.140607833862305,
+      "learning_rate": 1.9092050688969738e-05,
+      "loss": 0.6477,
+      "step": 241
+    },
+    {
+      "epoch": 0.8113998323554066,
+      "grad_norm": 8.173004150390625,
+      "learning_rate": 1.847236664577389e-05,
+      "loss": 0.3195,
+      "step": 242
+    },
+    {
+      "epoch": 0.8147527242246437,
+      "grad_norm": 12.263628959655762,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 0.3825,
+      "step": 243
+    },
+    {
+      "epoch": 0.818105616093881,
+      "grad_norm": 9.158538818359375,
+      "learning_rate": 1.725696330273575e-05,
+      "loss": 0.5394,
+      "step": 244
+    },
+    {
+      "epoch": 0.8214585079631181,
+      "grad_norm": 11.503657341003418,
+      "learning_rate": 1.6661549974185424e-05,
+      "loss": 0.5017,
+      "step": 245
+    },
+    {
+      "epoch": 0.8248113998323554,
+      "grad_norm": 11.45801830291748,
+      "learning_rate": 1.60745294221434e-05,
+      "loss": 0.5327,
+      "step": 246
+    },
+    {
+      "epoch": 0.8281642917015927,
+      "grad_norm": 14.59035873413086,
+      "learning_rate": 1.549604942589441e-05,
+      "loss": 0.6128,
+      "step": 247
+    },
+    {
+      "epoch": 0.8315171835708298,
+      "grad_norm": 11.337044715881348,
+      "learning_rate": 1.4926255614683932e-05,
+      "loss": 0.4564,
+      "step": 248
+    },
+    {
+      "epoch": 0.8348700754400671,
+      "grad_norm": 9.969059944152832,
+      "learning_rate": 1.4365291431056871e-05,
+      "loss": 0.7156,
+      "step": 249
+    },
+    {
+      "epoch": 0.8382229673093042,
+      "grad_norm": 10.448427200317383,
+      "learning_rate": 1.3813298094746491e-05,
+      "loss": 0.5685,
+      "step": 250
+    },
+    {
+      "epoch": 0.8415758591785415,
+      "grad_norm": 11.464609146118164,
+      "learning_rate": 1.327041456712334e-05,
+      "loss": 0.6357,
+      "step": 251
+    },
+    {
+      "epoch": 0.8449287510477788,
+      "grad_norm": 12.060856819152832,
+      "learning_rate": 1.2736777516212266e-05,
+      "loss": 0.7539,
+      "step": 252
+    },
+    {
+      "epoch": 0.8482816429170159,
+      "grad_norm": 12.243000984191895,
+      "learning_rate": 1.2212521282287092e-05,
+      "loss": 0.7645,
+      "step": 253
+    },
+    {
+      "epoch": 0.8516345347862532,
+      "grad_norm": 11.656434059143066,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 0.708,
+      "step": 254
+    },
+    {
+      "epoch": 0.8549874266554903,
+      "grad_norm": 12.392927169799805,
+      "learning_rate": 1.1192676785412154e-05,
+      "loss": 0.7513,
+      "step": 255
+    },
+    {
+      "epoch": 0.8583403185247276,
+      "grad_norm": 9.757222175598145,
+      "learning_rate": 1.0697345262860636e-05,
+      "loss": 0.6079,
+      "step": 256
+    },
+    {
+      "epoch": 0.8616932103939648,
+      "grad_norm": 8.159704208374023,
+      "learning_rate": 1.021190797345839e-05,
+      "loss": 0.2301,
+      "step": 257
+    },
+    {
+      "epoch": 0.865046102263202,
+      "grad_norm": 7.381707668304443,
+      "learning_rate": 9.73648712344707e-06,
+      "loss": 0.337,
+      "step": 258
+    },
+    {
+      "epoch": 0.8683989941324393,
+      "grad_norm": 9.870096206665039,
+      "learning_rate": 9.271202397483215e-06,
+      "loss": 0.2997,
+      "step": 259
+    },
+    {
+      "epoch": 0.8717518860016764,
+      "grad_norm": 8.603653907775879,
+      "learning_rate": 8.816170928508365e-06,
+      "loss": 0.2499,
+      "step": 260
+    },
+    {
+      "epoch": 0.8751047778709137,
+      "grad_norm": 6.676860809326172,
+      "learning_rate": 8.371507268261437e-06,
+      "loss": 0.3058,
+      "step": 261
+    },
+    {
+      "epoch": 0.8784576697401508,
+      "grad_norm": 8.418916702270508,
+      "learning_rate": 7.937323358440935e-06,
+      "loss": 0.2895,
+      "step": 262
+    },
+    {
+      "epoch": 0.8818105616093881,
+      "grad_norm": 10.64456558227539,
+      "learning_rate": 7.513728502524286e-06,
+      "loss": 0.3863,
+      "step": 263
+    },
+    {
+      "epoch": 0.8851634534786254,
+      "grad_norm": 7.481652736663818,
+      "learning_rate": 7.100829338251147e-06,
+      "loss": 0.2319,
+      "step": 264
+    },
+    {
+      "epoch": 0.8885163453478625,
+      "grad_norm": 7.484499454498291,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.1861,
+      "step": 265
+    },
+    {
+      "epoch": 0.8918692372170998,
+      "grad_norm": 6.579418659210205,
+      "learning_rate": 6.3075311465107535e-06,
+      "loss": 0.2557,
+      "step": 266
+    },
+    {
+      "epoch": 0.8952221290863369,
+      "grad_norm": 6.663677215576172,
+      "learning_rate": 5.927331827620903e-06,
+      "loss": 0.3708,
+      "step": 267
+    },
+    {
+      "epoch": 0.8985750209555742,
+      "grad_norm": 9.156440734863281,
+      "learning_rate": 5.558227567253832e-06,
+      "loss": 0.3865,
+      "step": 268
+    },
+    {
+      "epoch": 0.9019279128248114,
+      "grad_norm": 5.36343240737915,
+      "learning_rate": 5.200311285433213e-06,
+      "loss": 0.2776,
+      "step": 269
+    },
+    {
+      "epoch": 0.9052808046940486,
+      "grad_norm": 7.757360935211182,
+      "learning_rate": 4.853673085668947e-06,
+      "loss": 0.335,
+      "step": 270
+    },
+    {
+      "epoch": 0.9086336965632859,
+      "grad_norm": 12.774531364440918,
+      "learning_rate": 4.5184002322740785e-06,
+      "loss": 0.3653,
+      "step": 271
+    },
+    {
+      "epoch": 0.911986588432523,
+      "grad_norm": 8.137080192565918,
+      "learning_rate": 4.19457712839652e-06,
+      "loss": 0.392,
+      "step": 272
+    },
+    {
+      "epoch": 0.9153394803017603,
+      "grad_norm": 9.441322326660156,
+      "learning_rate": 3.8822852947709375e-06,
+      "loss": 0.3059,
+      "step": 273
+    },
+    {
+      "epoch": 0.9186923721709975,
+      "grad_norm": 5.269786834716797,
+      "learning_rate": 3.581603349196372e-06,
+      "loss": 0.2275,
+      "step": 274
+    },
+    {
+      "epoch": 0.9220452640402347,
+      "grad_norm": 9.277483940124512,
+      "learning_rate": 3.2926069867446675e-06,
+      "loss": 0.335,
+      "step": 275
+    },
+    {
+      "epoch": 0.9253981559094719,
+      "grad_norm": 9.3627290725708,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.4954,
+      "step": 276
+    },
+    {
+      "epoch": 0.9287510477787091,
+      "grad_norm": 7.629518508911133,
+      "learning_rate": 2.7499590642665774e-06,
+      "loss": 0.2234,
+      "step": 277
+    },
+    {
+      "epoch": 0.9321039396479464,
+      "grad_norm": 5.571048736572266,
+      "learning_rate": 2.496444112952734e-06,
+      "loss": 0.3485,
+      "step": 278
+    },
+    {
+      "epoch": 0.9354568315171836,
+      "grad_norm": 18.15276336669922,
+      "learning_rate": 2.2548879277963064e-06,
+      "loss": 0.3434,
+      "step": 279
+    },
+    {
+      "epoch": 0.9388097233864208,
+      "grad_norm": 14.836206436157227,
+      "learning_rate": 2.0253513192751373e-06,
+      "loss": 0.3263,
+      "step": 280
+    },
+    {
+      "epoch": 0.942162615255658,
+      "grad_norm": 13.923564910888672,
+      "learning_rate": 1.807892072002898e-06,
+      "loss": 0.4678,
+      "step": 281
+    },
+    {
+      "epoch": 0.9455155071248952,
+      "grad_norm": 15.387225151062012,
+      "learning_rate": 1.6025649301821876e-06,
+      "loss": 0.4785,
+      "step": 282
+    },
+    {
+      "epoch": 0.9488683989941324,
+      "grad_norm": 13.055351257324219,
+      "learning_rate": 1.4094215838229176e-06,
+      "loss": 0.563,
+      "step": 283
+    },
+    {
+      "epoch": 0.9522212908633697,
+      "grad_norm": 17.376829147338867,
+      "learning_rate": 1.2285106557296477e-06,
+      "loss": 0.3891,
+      "step": 284
+    },
+    {
+      "epoch": 0.9555741827326069,
+      "grad_norm": 17.277982711791992,
+      "learning_rate": 1.0598776892610685e-06,
+      "loss": 0.5198,
+      "step": 285
+    },
+    {
+      "epoch": 0.9589270746018441,
+      "grad_norm": 11.45787239074707,
+      "learning_rate": 9.035651368646648e-07,
+      "loss": 0.4912,
+      "step": 286
+    },
+    {
+      "epoch": 0.9622799664710813,
+      "grad_norm": 12.024084091186523,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.4365,
+      "step": 287
+    },
+    {
+      "epoch": 0.9656328583403185,
+      "grad_norm": 15.519173622131348,
+      "learning_rate": 6.280555661802856e-07,
+      "loss": 0.6457,
+      "step": 288
+    },
+    {
+      "epoch": 0.9689857502095558,
+      "grad_norm": 15.936901092529297,
+      "learning_rate": 5.089279059533658e-07,
+      "loss": 0.7402,
+      "step": 289
+    },
+    {
+      "epoch": 0.972338642078793,
+      "grad_norm": 14.855793952941895,
+      "learning_rate": 4.02259358460233e-07,
+      "loss": 0.5426,
+      "step": 290
+    },
+    {
+      "epoch": 0.9756915339480302,
+      "grad_norm": 17.52886199951172,
+      "learning_rate": 3.080767769372939e-07,
+      "loss": 0.5786,
+      "step": 291
+    },
+    {
+      "epoch": 0.9790444258172674,
+      "grad_norm": 14.614376068115234,
+      "learning_rate": 2.2640387134577058e-07,
+      "loss": 0.4134,
+      "step": 292
+    },
+    {
+      "epoch": 0.9823973176865046,
+      "grad_norm": 12.858384132385254,
+      "learning_rate": 1.5726120240288634e-07,
+      "loss": 0.9686,
+      "step": 293
+    },
+    {
+      "epoch": 0.9857502095557418,
+      "grad_norm": 14.443262100219727,
+      "learning_rate": 1.0066617640578368e-07,
+      "loss": 0.5455,
+      "step": 294
+    },
+    {
+      "epoch": 0.989103101424979,
+      "grad_norm": 14.196720123291016,
+      "learning_rate": 5.663304084960186e-08,
+      "loss": 0.4852,
+      "step": 295
+    },
+    {
+      "epoch": 0.9924559932942163,
+      "grad_norm": 13.345819473266602,
+      "learning_rate": 2.5172880840745873e-08,
+      "loss": 0.4879,
+      "step": 296
+    },
+    {
+      "epoch": 0.9958088851634534,
+      "grad_norm": 4.6811842918396,
+      "learning_rate": 6.293616306246586e-09,
+      "loss": 0.2391,
+      "step": 297
+    },
+    {
+      "epoch": 0.9991617770326907,
+      "grad_norm": 7.340142250061035,
+      "learning_rate": 0.0,
+      "loss": 0.4579,
+      "step": 298
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.998363176038564e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null