neuralwonderland commited on
Commit
a584e1f
·
verified ·
1 Parent(s): 2982a78

Training in progress, step 4650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b6a6a42ffc6d380e2565f1fc5063348a1e34156d7e515f8f7da05f46e62ad60
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d45b75e8780ea3804ea9254533607010c2e8b727e3c235634fa4b6050e8698cf
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e78a087bb6220991244d4fe57654663db01a4e71ac79cbc7cb3e84d260f51fe1
3
- size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b221d8f9f40f095d22c6d417ca3a0af75953108b073407e6f96b0d6ddfd2643
3
+ size 1049049378
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ad3318d3c187ce725fc1b6b7085bd28d07766f6a1219cb8fb8f59b2625444c2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0aef250a97cd4703312dac201660d7f97a09a6e15d50413621370c123af9698
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3803bd21b70ba2e397dcc504f146e8f1f5465da72c7ce9f40dd721afbea3d107
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:657bbe4e559d5f9e7581fbbb12237706f640d94bb8a67370d9ae890c56e7c61f
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1950809955596924,
3
- "best_model_checkpoint": "./output/checkpoint-4500",
4
- "epoch": 0.20156774916013437,
5
  "eval_steps": 150,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3397,6 +3397,119 @@
3397
  "eval_samples_per_second": 9.665,
3398
  "eval_steps_per_second": 9.665,
3399
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3400
  }
3401
  ],
3402
  "logging_steps": 10,
@@ -3416,7 +3529,7 @@
3416
  "attributes": {}
3417
  }
3418
  },
3419
- "total_flos": 5.818050367543296e+17,
3420
  "train_batch_size": 4,
3421
  "trial_name": null,
3422
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1947814226150513,
3
+ "best_model_checkpoint": "./output/checkpoint-4650",
4
+ "epoch": 0.20828667413213886,
5
  "eval_steps": 150,
6
+ "global_step": 4650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3397
  "eval_samples_per_second": 9.665,
3398
  "eval_steps_per_second": 9.665,
3399
  "step": 4500
3400
+ },
3401
+ {
3402
+ "epoch": 0.20201567749160135,
3403
+ "grad_norm": 4.709386348724365,
3404
+ "learning_rate": 1.8353806389317428e-07,
3405
+ "loss": 0.9829,
3406
+ "step": 4510
3407
+ },
3408
+ {
3409
+ "epoch": 0.2024636058230683,
3410
+ "grad_norm": 5.23099946975708,
3411
+ "learning_rate": 1.761817823576731e-07,
3412
+ "loss": 1.1149,
3413
+ "step": 4520
3414
+ },
3415
+ {
3416
+ "epoch": 0.20291153415453528,
3417
+ "grad_norm": 3.4107179641723633,
3418
+ "learning_rate": 1.6897240649141125e-07,
3419
+ "loss": 0.9822,
3420
+ "step": 4530
3421
+ },
3422
+ {
3423
+ "epoch": 0.20335946248600223,
3424
+ "grad_norm": 3.951052188873291,
3425
+ "learning_rate": 1.619102326435923e-07,
3426
+ "loss": 1.2333,
3427
+ "step": 4540
3428
+ },
3429
+ {
3430
+ "epoch": 0.2038073908174692,
3431
+ "grad_norm": 4.30809211730957,
3432
+ "learning_rate": 1.5499555111252285e-07,
3433
+ "loss": 1.0641,
3434
+ "step": 4550
3435
+ },
3436
+ {
3437
+ "epoch": 0.20425531914893616,
3438
+ "grad_norm": 4.1274189949035645,
3439
+ "learning_rate": 1.4822864613367766e-07,
3440
+ "loss": 1.0962,
3441
+ "step": 4560
3442
+ },
3443
+ {
3444
+ "epoch": 0.20470324748040314,
3445
+ "grad_norm": 6.046044826507568,
3446
+ "learning_rate": 1.4160979586801724e-07,
3447
+ "loss": 1.0241,
3448
+ "step": 4570
3449
+ },
3450
+ {
3451
+ "epoch": 0.2051511758118701,
3452
+ "grad_norm": 4.066288471221924,
3453
+ "learning_rate": 1.3513927239055036e-07,
3454
+ "loss": 0.9061,
3455
+ "step": 4580
3456
+ },
3457
+ {
3458
+ "epoch": 0.20559910414333707,
3459
+ "grad_norm": 3.9250218868255615,
3460
+ "learning_rate": 1.2881734167915425e-07,
3461
+ "loss": 1.1666,
3462
+ "step": 4590
3463
+ },
3464
+ {
3465
+ "epoch": 0.20604703247480402,
3466
+ "grad_norm": 4.965548515319824,
3467
+ "learning_rate": 1.2264426360363956e-07,
3468
+ "loss": 0.8048,
3469
+ "step": 4600
3470
+ },
3471
+ {
3472
+ "epoch": 0.206494960806271,
3473
+ "grad_norm": 5.192389965057373,
3474
+ "learning_rate": 1.1662029191506775e-07,
3475
+ "loss": 0.9869,
3476
+ "step": 4610
3477
+ },
3478
+ {
3479
+ "epoch": 0.20694288913773795,
3480
+ "grad_norm": 4.953862190246582,
3481
+ "learning_rate": 1.107456742353201e-07,
3482
+ "loss": 1.0042,
3483
+ "step": 4620
3484
+ },
3485
+ {
3486
+ "epoch": 0.20739081746920493,
3487
+ "grad_norm": 4.955436706542969,
3488
+ "learning_rate": 1.0502065204692062e-07,
3489
+ "loss": 1.101,
3490
+ "step": 4630
3491
+ },
3492
+ {
3493
+ "epoch": 0.20783874580067188,
3494
+ "grad_norm": 2.5195674896240234,
3495
+ "learning_rate": 9.94454606831076e-08,
3496
+ "loss": 0.9542,
3497
+ "step": 4640
3498
+ },
3499
+ {
3500
+ "epoch": 0.20828667413213886,
3501
+ "grad_norm": 4.142997741699219,
3502
+ "learning_rate": 9.402032931816144e-08,
3503
+ "loss": 1.1318,
3504
+ "step": 4650
3505
+ },
3506
+ {
3507
+ "epoch": 0.20828667413213886,
3508
+ "eval_loss": 1.1947814226150513,
3509
+ "eval_runtime": 51.8063,
3510
+ "eval_samples_per_second": 9.651,
3511
+ "eval_steps_per_second": 9.651,
3512
+ "step": 4650
3513
  }
3514
  ],
3515
  "logging_steps": 10,
 
3529
  "attributes": {}
3530
  }
3531
  },
3532
+ "total_flos": 6.013945095856128e+17,
3533
  "train_batch_size": 4,
3534
  "trial_name": null,
3535
  "trial_params": null