neuralwonderland commited on
Commit
ad13138
·
verified ·
1 Parent(s): 3144dce

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd6e27722a4556ce816cc06dea2192deb470b4c7575547d0385f0f64e0582419
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00d190fa03a8b744759e3dc60d422da2e78988c51a3ed128830b6205a9c696be
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98f47e22f9bc7ea2f2a36cb3cf34559618d78e0cc231ffa15b9fa08e4dfaf5e9
3
  size 671467026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:168fd07a7bfd886ba082b420f079e60065cd81d463cf839e7eea49403cd4a717
3
  size 671467026
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3db1b1dc6380b47fa5473e08619f5033b5e9db1f7a5928be021a489f2d2d5df1
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9a20ffde0bcc0378377fd456c0fc9ed90bdc5e0f81640a306edad6f8e3d388d
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d965ba2c87314f68867b110394668e2ae1d68cbd5affed6052b0bbf6bcf4700e
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580af5054c7ccbfc854ec8423aba1a6359c216121b4aaa182dffa1292869253d
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0024483543820679188,
3
- "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 1.304702364773036,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 9.09,
3624
  "eval_steps_per_second": 9.09,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 1.1123209269885665e+18,
3646
  "train_batch_size": 16,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.002407131949439645,
3
+ "best_model_checkpoint": "./output/checkpoint-4950",
4
+ "epoch": 1.3454743136721936,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 9.09,
3624
  "eval_steps_per_second": 9.09,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 1.3074204946996466,
3629
+ "grad_norm": 2.8233747482299805,
3630
+ "learning_rate": 1.5284161868940867e-07,
3631
+ "loss": 0.0025,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 1.3101386246262572,
3636
+ "grad_norm": 2.893601179122925,
3637
+ "learning_rate": 1.371938027303652e-07,
3638
+ "loss": 0.0079,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 1.3128567545528675,
3643
+ "grad_norm": 0.9337513446807861,
3644
+ "learning_rate": 1.223881602388429e-07,
3645
+ "loss": 0.008,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 1.3155748844794781,
3650
+ "grad_norm": 0.9524758458137512,
3651
+ "learning_rate": 1.08425299816816e-07,
3652
+ "loss": 0.0051,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 1.3182930144060885,
3657
+ "grad_norm": 3.0172386169433594,
3658
+ "learning_rate": 9.53057954227865e-08,
3659
+ "loss": 0.0042,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 1.321011144332699,
3664
+ "grad_norm": 1.300944447517395,
3665
+ "learning_rate": 8.303018634819421e-08,
3666
+ "loss": 0.0041,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 1.3237292742593096,
3671
+ "grad_norm": 0.5587581396102905,
3672
+ "learning_rate": 7.159897719524897e-08,
3673
+ "loss": 0.0043,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 1.3264474041859202,
3678
+ "grad_norm": 2.783456563949585,
3679
+ "learning_rate": 6.101263785618691e-08,
3680
+ "loss": 0.0048,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 1.3291655341125306,
3685
+ "grad_norm": 1.9860410690307617,
3686
+ "learning_rate": 5.127160349395123e-08,
3687
+ "loss": 0.0052,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 1.331883664039141,
3692
+ "grad_norm": 3.2289083003997803,
3693
+ "learning_rate": 4.2376274524313086e-08,
3694
+ "loss": 0.0033,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 1.3346017939657515,
3699
+ "grad_norm": 0.5823839902877808,
3700
+ "learning_rate": 3.432701659940772e-08,
3701
+ "loss": 0.0068,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 1.337319923892362,
3706
+ "grad_norm": 1.3258577585220337,
3707
+ "learning_rate": 2.7124160592697094e-08,
3708
+ "loss": 0.0042,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 1.3400380538189727,
3713
+ "grad_norm": 2.034165620803833,
3714
+ "learning_rate": 2.0768002585386592e-08,
3715
+ "loss": 0.0053,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 1.342756183745583,
3720
+ "grad_norm": 2.6955316066741943,
3721
+ "learning_rate": 1.525880385422937e-08,
3722
+ "loss": 0.0064,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 1.3454743136721936,
3727
+ "grad_norm": 1.4360600709915161,
3728
+ "learning_rate": 1.0596790860812194e-08,
3729
+ "loss": 0.0098,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 1.3454743136721936,
3734
+ "eval_loss": 0.002407131949439645,
3735
+ "eval_runtime": 56.3377,
3736
+ "eval_samples_per_second": 8.893,
3737
+ "eval_steps_per_second": 8.893,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 1.1468899731314442e+18,
3759
  "train_batch_size": 16,
3760
  "trial_name": null,
3761
  "trial_params": null