farmery commited on
Commit
55019ba
·
verified ·
1 Parent(s): 7111be6

Training in progress, step 263, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6e5c39604ce8318809f0f58050866a16208e2d2602dead51d983b132e683fb
3
  size 93608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b1af8c1d1044559e4566bbe8a9da01e3a7cd6a8b87d6052aba8332beb6fc41
3
  size 93608
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64c40d1bc73f541b515340dac6469684a54f37cc18606adddfab612440adebee
3
  size 197158
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:550fcf36ce2e98b0de4e1aeef16362ea975811edfde74052fd22cc5f18443aff
3
  size 197158
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae275df9131dadab3e84666c81acf7aef0f1e0d393e070db94afde55cc7a04cb
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bdcfd957e9096fb21e3e831d470a6e7f9eb410847ee84b1c39e0af81ca448b3
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7599f9359c4d25abeca594df3574d460ca4601b65af6b90fc9847ccb839f2fa
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2d933dbe0de678b7fe46f2875c92f23e4a28843792efe278b61113788ec0702
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae756a5b4dca2f7ad8c44c4862c84851fc3ad00fdb24067fecc92d6f2b533dac
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af588657011060d064f635dca4d0d830bcd43082eaae2d0fed1a5df4b6b5ddc
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:753fe59205deb0859689da31f17244e755cd7f229128f79e49d0491eec8ee8fa
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e1c71efe8f09776760711a07fe747388a51628ea866272ad0a46951f43e370b
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d11cedd0890a662e2118d0364333043dfd399c79d0a1ea8d4a89bb7cbc6705ac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c4c21ea8440b1f22c3f50a73abd0d48167dee8529fcaea1c9b7867aa3874fd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.763532763532764,
5
  "eval_steps": 22,
6
- "global_step": 242,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -663,6 +663,55 @@
663
  "eval_samples_per_second": 407.478,
664
  "eval_steps_per_second": 13.122,
665
  "step": 242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
  }
667
  ],
668
  "logging_steps": 3,
@@ -677,12 +726,12 @@
677
  "should_evaluate": false,
678
  "should_log": false,
679
  "should_save": true,
680
- "should_training_stop": false
681
  },
682
  "attributes": {}
683
  }
684
  },
685
- "total_flos": 5281217249280.0,
686
  "train_batch_size": 8,
687
  "trial_name": null,
688
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.005698005698006,
5
  "eval_steps": 22,
6
+ "global_step": 263,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
663
  "eval_samples_per_second": 407.478,
664
  "eval_steps_per_second": 13.122,
665
  "step": 242
666
+ },
667
+ {
668
+ "epoch": 2.774928774928775,
669
+ "grad_norm": 0.034080274403095245,
670
+ "learning_rate": 1.5340001884502574e-06,
671
+ "loss": 11.9085,
672
+ "step": 243
673
+ },
674
+ {
675
+ "epoch": 2.809116809116809,
676
+ "grad_norm": 0.02778993546962738,
677
+ "learning_rate": 1.1098984190808404e-06,
678
+ "loss": 11.909,
679
+ "step": 246
680
+ },
681
+ {
682
+ "epoch": 2.8433048433048436,
683
+ "grad_norm": 0.039922092109918594,
684
+ "learning_rate": 7.536344767570536e-07,
685
+ "loss": 11.9091,
686
+ "step": 249
687
+ },
688
+ {
689
+ "epoch": 2.8774928774928776,
690
+ "grad_norm": 0.0371006578207016,
691
+ "learning_rate": 4.6570269818346224e-07,
692
+ "loss": 11.9102,
693
+ "step": 252
694
+ },
695
+ {
696
+ "epoch": 2.9116809116809117,
697
+ "grad_norm": 0.03678058087825775,
698
+ "learning_rate": 2.46502605258464e-07,
699
+ "loss": 11.9108,
700
+ "step": 255
701
+ },
702
+ {
703
+ "epoch": 2.9458689458689458,
704
+ "grad_norm": 0.02875494584441185,
705
+ "learning_rate": 9.633835071463094e-08,
706
+ "loss": 11.9097,
707
+ "step": 258
708
+ },
709
+ {
710
+ "epoch": 2.98005698005698,
711
+ "grad_norm": 0.025189543142914772,
712
+ "learning_rate": 1.5418296089358963e-08,
713
+ "loss": 11.9096,
714
+ "step": 261
715
  }
716
  ],
717
  "logging_steps": 3,
 
726
  "should_evaluate": false,
727
  "should_log": false,
728
  "should_save": true,
729
+ "should_training_stop": true
730
  },
731
  "attributes": {}
732
  }
733
  },
734
+ "total_flos": 5733169889280.0,
735
  "train_batch_size": 8,
736
  "trial_name": null,
737
  "trial_params": null