irishprancer commited on
Commit
29470bb
·
verified ·
1 Parent(s): 6065e8d

Training in progress, step 4650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3514c73345f69d0c9842f4ae7671f25a434e86505a1a2eb1ce04573fead9d03
3
  size 1482788592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e892cb1a4cbe178b5793fc4bc7b06df7ba09245ba7288ee2bc0ba93be87167f8
3
  size 1482788592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d6ee7f3a10460f3ffb6333f04ca0412b319601b5d8bf66fb361bd45ab5758c2
3
  size 2897966842
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06fb50533439b160e16eaa506c92e9c15f37289f74228471e7951b6d1e0ee78c
3
  size 2897966842
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2483a85e0c4bab047f3ed01375369019a5f42e1b495801c626999c441bf2b7d8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30d893290e5e1fdb574b3915ebc94cb574860f01edf82e3dbe4acb2cc67e7660
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f72d12e08f2981b12196a00ff48fe5cac0ba4d9d1aa54f91464a195ecde87c8
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:931d38342b692c160fcb90f4d9acb8e6f6634f499984cdd5b99b0563194d400a
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.08717679977417,
3
  "best_model_checkpoint": "./output/checkpoint-4500",
4
- "epoch": 0.2786205188533218,
5
  "eval_steps": 150,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3397,6 +3397,119 @@
3397
  "eval_samples_per_second": 9.727,
3398
  "eval_steps_per_second": 9.727,
3399
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3400
  }
3401
  ],
3402
  "logging_steps": 10,
@@ -3416,7 +3529,7 @@
3416
  "attributes": {}
3417
  }
3418
  },
3419
- "total_flos": 3.118865314362163e+17,
3420
  "train_batch_size": 4,
3421
  "trial_name": null,
3422
  "trial_params": null
 
1
  {
2
  "best_metric": 2.08717679977417,
3
  "best_model_checkpoint": "./output/checkpoint-4500",
4
+ "epoch": 0.28790786948176583,
5
  "eval_steps": 150,
6
+ "global_step": 4650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3397
  "eval_samples_per_second": 9.727,
3398
  "eval_steps_per_second": 9.727,
3399
  "step": 4500
3400
+ },
3401
+ {
3402
+ "epoch": 0.2792396755618847,
3403
+ "grad_norm": 30.00826644897461,
3404
+ "learning_rate": 5.472047830984499e-07,
3405
+ "loss": 1.8833,
3406
+ "step": 4510
3407
+ },
3408
+ {
3409
+ "epoch": 0.27985883227044767,
3410
+ "grad_norm": 25.873231887817383,
3411
+ "learning_rate": 5.252725889984403e-07,
3412
+ "loss": 1.6589,
3413
+ "step": 4520
3414
+ },
3415
+ {
3416
+ "epoch": 0.2804779889790106,
3417
+ "grad_norm": 35.64846420288086,
3418
+ "learning_rate": 5.037783829820298e-07,
3419
+ "loss": 1.7872,
3420
+ "step": 4530
3421
+ },
3422
+ {
3423
+ "epoch": 0.28109714568757355,
3424
+ "grad_norm": 18.542455673217773,
3425
+ "learning_rate": 4.827230485918372e-07,
3426
+ "loss": 1.7618,
3427
+ "step": 4540
3428
+ },
3429
+ {
3430
+ "epoch": 0.2817163023961365,
3431
+ "grad_norm": 16.17365837097168,
3432
+ "learning_rate": 4.6210745133019236e-07,
3433
+ "loss": 1.4652,
3434
+ "step": 4550
3435
+ },
3436
+ {
3437
+ "epoch": 0.2823354591046994,
3438
+ "grad_norm": 33.95541763305664,
3439
+ "learning_rate": 4.419324386235529e-07,
3440
+ "loss": 1.7503,
3441
+ "step": 4560
3442
+ },
3443
+ {
3444
+ "epoch": 0.2829546158132623,
3445
+ "grad_norm": 18.06041145324707,
3446
+ "learning_rate": 4.2219883978767386e-07,
3447
+ "loss": 1.9819,
3448
+ "step": 4570
3449
+ },
3450
+ {
3451
+ "epoch": 0.28357377252182525,
3452
+ "grad_norm": 27.695892333984375,
3453
+ "learning_rate": 4.029074659935082e-07,
3454
+ "loss": 1.7574,
3455
+ "step": 4580
3456
+ },
3457
+ {
3458
+ "epoch": 0.2841929292303882,
3459
+ "grad_norm": 37.04735565185547,
3460
+ "learning_rate": 3.8405911023387444e-07,
3461
+ "loss": 1.7715,
3462
+ "step": 4590
3463
+ },
3464
+ {
3465
+ "epoch": 0.28481208593895113,
3466
+ "grad_norm": 27.74335289001465,
3467
+ "learning_rate": 3.6565454729085526e-07,
3468
+ "loss": 1.9633,
3469
+ "step": 4600
3470
+ },
3471
+ {
3472
+ "epoch": 0.28543124264751407,
3473
+ "grad_norm": 23.256072998046875,
3474
+ "learning_rate": 3.4769453370394753e-07,
3475
+ "loss": 1.8392,
3476
+ "step": 4610
3477
+ },
3478
+ {
3479
+ "epoch": 0.286050399356077,
3480
+ "grad_norm": 24.18708038330078,
3481
+ "learning_rate": 3.301798077389637e-07,
3482
+ "loss": 1.8558,
3483
+ "step": 4620
3484
+ },
3485
+ {
3486
+ "epoch": 0.28666955606463995,
3487
+ "grad_norm": 33.63625717163086,
3488
+ "learning_rate": 3.1311108935768926e-07,
3489
+ "loss": 1.9262,
3490
+ "step": 4630
3491
+ },
3492
+ {
3493
+ "epoch": 0.2872887127732029,
3494
+ "grad_norm": 23.810638427734375,
3495
+ "learning_rate": 2.964890801882817e-07,
3496
+ "loss": 1.7218,
3497
+ "step": 4640
3498
+ },
3499
+ {
3500
+ "epoch": 0.28790786948176583,
3501
+ "grad_norm": 33.80169677734375,
3502
+ "learning_rate": 2.8031446349643393e-07,
3503
+ "loss": 1.9988,
3504
+ "step": 4650
3505
+ },
3506
+ {
3507
+ "epoch": 0.28790786948176583,
3508
+ "eval_loss": 2.0872082710266113,
3509
+ "eval_runtime": 49.569,
3510
+ "eval_samples_per_second": 10.087,
3511
+ "eval_steps_per_second": 10.087,
3512
+ "step": 4650
3513
  }
3514
  ],
3515
  "logging_steps": 10,
 
3529
  "attributes": {}
3530
  }
3531
  },
3532
+ "total_flos": 3.228632494763213e+17,
3533
  "train_batch_size": 4,
3534
  "trial_name": null,
3535
  "trial_params": null