neuralwonderland commited on
Commit
648f4fa
·
verified ·
1 Parent(s): f460215

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adef2194e28a0e80a5d89be6ca9a1cb8132a1216977b90d6e6e6fdda8ef93136
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92972f511e5eaf86803bd9b1c88f96386c1661012d60534c0327e42dab3fd264
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c0289d6836936e1e1d4d1a00c26faada58bd772c60da91219dcb039a4acdd96
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb4b509759ad8756cf8ce1affd78d65013c40a85d5292009494d04b60a432580
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c36d18edd5105b91142175220fd394d193785a170a78cb6528ab60f8815462b
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80e19b05191e0292506bdab7670f3de79b1841203d7c1b0fb1140428596feefe
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2136a062516a1e5c179a267ecbd6b2d49f704c3f02f0e487b4db7582df66bbb
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16bf4acbbdf4243a5b30121f973d426853acaed3d0caa7d9deb065ee076e1e1d
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7707881927490234,
3
- "best_model_checkpoint": "./output/checkpoint-4350",
4
- "epoch": 0.5407757334659373,
5
  "eval_steps": 150,
6
- "global_step": 4350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3284,6 +3284,119 @@
3284
  "eval_samples_per_second": 8.742,
3285
  "eval_steps_per_second": 8.742,
3286
  "step": 4350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3287
  }
3288
  ],
3289
  "logging_steps": 10,
@@ -3303,7 +3416,7 @@
3303
  "attributes": {}
3304
  }
3305
  },
3306
- "total_flos": 1.5140076638825472e+17,
3307
  "train_batch_size": 16,
3308
  "trial_name": null,
3309
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7698713541030884,
3
+ "best_model_checkpoint": "./output/checkpoint-4500",
4
+ "epoch": 0.5594231725509696,
5
  "eval_steps": 150,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3284
  "eval_samples_per_second": 8.742,
3285
  "eval_steps_per_second": 8.742,
3286
  "step": 4350
3287
+ },
3288
+ {
3289
+ "epoch": 0.5420188960716061,
3290
+ "grad_norm": 1.6083730459213257,
3291
+ "learning_rate": 5.188184706397182e-06,
3292
+ "loss": 0.6078,
3293
+ "step": 4360
3294
+ },
3295
+ {
3296
+ "epoch": 0.543262058677275,
3297
+ "grad_norm": 1.7973166704177856,
3298
+ "learning_rate": 5.029514215534339e-06,
3299
+ "loss": 0.5657,
3300
+ "step": 4370
3301
+ },
3302
+ {
3303
+ "epoch": 0.5445052212829438,
3304
+ "grad_norm": 1.397307276725769,
3305
+ "learning_rate": 4.873206111245594e-06,
3306
+ "loss": 0.5861,
3307
+ "step": 4380
3308
+ },
3309
+ {
3310
+ "epoch": 0.5457483838886126,
3311
+ "grad_norm": 1.766788363456726,
3312
+ "learning_rate": 4.719266818744912e-06,
3313
+ "loss": 0.5335,
3314
+ "step": 4390
3315
+ },
3316
+ {
3317
+ "epoch": 0.5469915464942815,
3318
+ "grad_norm": 2.368110418319702,
3319
+ "learning_rate": 4.567702665873648e-06,
3320
+ "loss": 0.6134,
3321
+ "step": 4400
3322
+ },
3323
+ {
3324
+ "epoch": 0.5482347090999503,
3325
+ "grad_norm": 1.4024748802185059,
3326
+ "learning_rate": 4.418519882840505e-06,
3327
+ "loss": 0.5903,
3328
+ "step": 4410
3329
+ },
3330
+ {
3331
+ "epoch": 0.5494778717056191,
3332
+ "grad_norm": 1.45235013961792,
3333
+ "learning_rate": 4.271724601965371e-06,
3334
+ "loss": 0.6008,
3335
+ "step": 4420
3336
+ },
3337
+ {
3338
+ "epoch": 0.5507210343112879,
3339
+ "grad_norm": 1.5979630947113037,
3340
+ "learning_rate": 4.127322857427306e-06,
3341
+ "loss": 0.5718,
3342
+ "step": 4430
3343
+ },
3344
+ {
3345
+ "epoch": 0.5519641969169568,
3346
+ "grad_norm": 1.427748203277588,
3347
+ "learning_rate": 3.985320585016425e-06,
3348
+ "loss": 0.5916,
3349
+ "step": 4440
3350
+ },
3351
+ {
3352
+ "epoch": 0.5532073595226256,
3353
+ "grad_norm": 1.756362795829773,
3354
+ "learning_rate": 3.845723621889973e-06,
3355
+ "loss": 0.5969,
3356
+ "step": 4450
3357
+ },
3358
+ {
3359
+ "epoch": 0.5544505221282944,
3360
+ "grad_norm": 1.447805404663086,
3361
+ "learning_rate": 3.7085377063323447e-06,
3362
+ "loss": 0.5322,
3363
+ "step": 4460
3364
+ },
3365
+ {
3366
+ "epoch": 0.5556936847339632,
3367
+ "grad_norm": 1.3792946338653564,
3368
+ "learning_rate": 3.5737684775191887e-06,
3369
+ "loss": 0.5492,
3370
+ "step": 4470
3371
+ },
3372
+ {
3373
+ "epoch": 0.556936847339632,
3374
+ "grad_norm": 1.699859619140625,
3375
+ "learning_rate": 3.441421475285679e-06,
3376
+ "loss": 0.5606,
3377
+ "step": 4480
3378
+ },
3379
+ {
3380
+ "epoch": 0.5581800099453008,
3381
+ "grad_norm": 1.4851022958755493,
3382
+ "learning_rate": 3.3115021398986768e-06,
3383
+ "loss": 0.6094,
3384
+ "step": 4490
3385
+ },
3386
+ {
3387
+ "epoch": 0.5594231725509696,
3388
+ "grad_norm": 0.9180851578712463,
3389
+ "learning_rate": 3.18401581183321e-06,
3390
+ "loss": 0.5229,
3391
+ "step": 4500
3392
+ },
3393
+ {
3394
+ "epoch": 0.5594231725509696,
3395
+ "eval_loss": 0.7698713541030884,
3396
+ "eval_runtime": 55.7444,
3397
+ "eval_samples_per_second": 8.97,
3398
+ "eval_steps_per_second": 8.97,
3399
+ "step": 4500
3400
  }
3401
  ],
3402
  "logging_steps": 10,
 
3416
  "attributes": {}
3417
  }
3418
  },
3419
+ "total_flos": 1.5666787302912e+17,
3420
  "train_batch_size": 16,
3421
  "trial_name": null,
3422
  "trial_params": null