alicegoesdown commited on
Commit
b01b1bc
·
verified ·
1 Parent(s): 7435ff4

Training in progress, step 3900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c66c562a5853f6fb7fbdca9387ecc49bd018ff283301b7a1084d91a2cdb51062
3
  size 39131224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09aad91489281aa46d111f68fded42b77aab4c9128cec9984635fd689a01e3fb
3
  size 39131224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fceb2f46f3daf2430a6b0e34d7b2ff2bdec2304fd1d323e96cd214475f30db1
3
  size 78504766
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1532bb48cd00bccff557e8256c5a172e9ae72064bce13296fde1d1d4f71114d
3
  size 78504766
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e74313bf6c4504de4d8880cc78df70080bc0d75747aee5cbb39f9abd0bf109ae
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87dc5632ebf3c5289ed0c68a963f97e38fc4f56383d0e0add7df2752b51604ba
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:607202c5623a0d7c32f1c23cdb843f56835e006025f9cf66875098e00a1be2b7
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6952ca8c366d31ce68c6f3aa858eaa30f20b96aa4ac7725ee518bac50e5f06f7
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.5047594904899597,
3
  "best_model_checkpoint": "./output/checkpoint-3000",
4
- "epoch": 7.129277566539924,
5
  "eval_steps": 150,
6
- "global_step": 3750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2832,6 +2832,119 @@
2832
  "eval_samples_per_second": 9.992,
2833
  "eval_steps_per_second": 9.992,
2834
  "step": 3750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2835
  }
2836
  ],
2837
  "logging_steps": 10,
@@ -2851,7 +2964,7 @@
2851
  "attributes": {}
2852
  }
2853
  },
2854
- "total_flos": 3826482960319488.0,
2855
  "train_batch_size": 8,
2856
  "trial_name": null,
2857
  "trial_params": null
 
1
  {
2
  "best_metric": 0.5047594904899597,
3
  "best_model_checkpoint": "./output/checkpoint-3000",
4
+ "epoch": 7.414448669201521,
5
  "eval_steps": 150,
6
+ "global_step": 3900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2832
  "eval_samples_per_second": 9.992,
2833
  "eval_steps_per_second": 9.992,
2834
  "step": 3750
2835
+ },
2836
+ {
2837
+ "epoch": 7.14828897338403,
2838
+ "grad_norm": 0.4247870147228241,
2839
+ "learning_rate": 1.8732884035296582e-05,
2840
+ "loss": 0.0146,
2841
+ "step": 3760
2842
+ },
2843
+ {
2844
+ "epoch": 7.167300380228137,
2845
+ "grad_norm": 0.7434565424919128,
2846
+ "learning_rate": 1.844772657802428e-05,
2847
+ "loss": 0.0192,
2848
+ "step": 3770
2849
+ },
2850
+ {
2851
+ "epoch": 7.186311787072244,
2852
+ "grad_norm": 1.7033151388168335,
2853
+ "learning_rate": 1.8164379937171382e-05,
2854
+ "loss": 0.0192,
2855
+ "step": 3780
2856
+ },
2857
+ {
2858
+ "epoch": 7.20532319391635,
2859
+ "grad_norm": 1.1659843921661377,
2860
+ "learning_rate": 1.7882855760008547e-05,
2861
+ "loss": 0.0185,
2862
+ "step": 3790
2863
+ },
2864
+ {
2865
+ "epoch": 7.224334600760456,
2866
+ "grad_norm": 0.6887686848640442,
2867
+ "learning_rate": 1.760316561889203e-05,
2868
+ "loss": 0.0165,
2869
+ "step": 3800
2870
+ },
2871
+ {
2872
+ "epoch": 7.243346007604563,
2873
+ "grad_norm": 0.6986510753631592,
2874
+ "learning_rate": 1.7325321010788034e-05,
2875
+ "loss": 0.0247,
2876
+ "step": 3810
2877
+ },
2878
+ {
2879
+ "epoch": 7.262357414448669,
2880
+ "grad_norm": 0.3889031410217285,
2881
+ "learning_rate": 1.7049333356800167e-05,
2882
+ "loss": 0.0155,
2883
+ "step": 3820
2884
+ },
2885
+ {
2886
+ "epoch": 7.281368821292776,
2887
+ "grad_norm": 0.5668617486953735,
2888
+ "learning_rate": 1.6775214001699914e-05,
2889
+ "loss": 0.014,
2890
+ "step": 3830
2891
+ },
2892
+ {
2893
+ "epoch": 7.300380228136882,
2894
+ "grad_norm": 0.600253164768219,
2895
+ "learning_rate": 1.6502974213460316e-05,
2896
+ "loss": 0.0172,
2897
+ "step": 3840
2898
+ },
2899
+ {
2900
+ "epoch": 7.319391634980988,
2901
+ "grad_norm": 3.6719555854797363,
2902
+ "learning_rate": 1.623262518279279e-05,
2903
+ "loss": 0.0188,
2904
+ "step": 3850
2905
+ },
2906
+ {
2907
+ "epoch": 7.338403041825095,
2908
+ "grad_norm": 0.4451853334903717,
2909
+ "learning_rate": 1.596417802268707e-05,
2910
+ "loss": 0.0176,
2911
+ "step": 3860
2912
+ },
2913
+ {
2914
+ "epoch": 7.357414448669202,
2915
+ "grad_norm": 0.7403327822685242,
2916
+ "learning_rate": 1.5697643767954488e-05,
2917
+ "loss": 0.0201,
2918
+ "step": 3870
2919
+ },
2920
+ {
2921
+ "epoch": 7.3764258555133075,
2922
+ "grad_norm": 0.43177762627601624,
2923
+ "learning_rate": 1.543303337477432e-05,
2924
+ "loss": 0.0158,
2925
+ "step": 3880
2926
+ },
2927
+ {
2928
+ "epoch": 7.395437262357414,
2929
+ "grad_norm": 0.5685768127441406,
2930
+ "learning_rate": 1.517035772024343e-05,
2931
+ "loss": 0.0158,
2932
+ "step": 3890
2933
+ },
2934
+ {
2935
+ "epoch": 7.414448669201521,
2936
+ "grad_norm": 0.24146628379821777,
2937
+ "learning_rate": 1.49096276019291e-05,
2938
+ "loss": 0.0212,
2939
+ "step": 3900
2940
+ },
2941
+ {
2942
+ "epoch": 7.414448669201521,
2943
+ "eval_loss": 0.5245772004127502,
2944
+ "eval_runtime": 47.7948,
2945
+ "eval_samples_per_second": 9.792,
2946
+ "eval_steps_per_second": 9.792,
2947
+ "step": 3900
2948
  }
2949
  ],
2950
  "logging_steps": 10,
 
2964
  "attributes": {}
2965
  }
2966
  },
2967
+ "total_flos": 3979633349551104.0,
2968
  "train_batch_size": 8,
2969
  "trial_name": null,
2970
  "trial_params": null