alicegoesdown commited on
Commit
904d15a
·
verified ·
1 Parent(s): c874f55

Training in progress, step 3900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afceb16db7095163c12ff254b125dec7a8e756afb939583097fe0c319218f266
3
  size 653434568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c78c7430cf5b9ef6212c236177c5a3a696974a0d15fdd8ede05ffbf0c3135612
3
  size 653434568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:146922f6e085c00287f8a8d67d633666043fb31db7daade124fd99cfdfa9b74a
3
  size 1288533754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17539e5a9fb0d37a41d5d03dca6d304ee838df4bb8dd58d5b855815f2f50aafc
3
  size 1288533754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1950119940a674954a451de65d3b5b14146ef3decc11fa73ee90422e566f32f6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ed5d949cb12305b0310078c1beaef8015e2f1ca27ebf48ab7768e7ec8214eff
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a6a877a565e5ca765e02fcf7b35e1ef12c2842737d39f9a2daa9a2cc4aea1b9
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:782f4b11165c85795edd4383fdab33cd4f492f186c42efd14a7c5ea61341fe99
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.499360203742981,
3
- "best_model_checkpoint": "./output/checkpoint-3750",
4
- "epoch": 0.33250576343323285,
5
  "eval_steps": 150,
6
- "global_step": 3750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2832,6 +2832,119 @@
2832
  "eval_samples_per_second": 8.418,
2833
  "eval_steps_per_second": 8.418,
2834
  "step": 3750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2835
  }
2836
  ],
2837
  "logging_steps": 10,
@@ -2851,7 +2964,7 @@
2851
  "attributes": {}
2852
  }
2853
  },
2854
- "total_flos": 1.2625500121375703e+18,
2855
  "train_batch_size": 8,
2856
  "trial_name": null,
2857
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.4924039840698242,
3
+ "best_model_checkpoint": "./output/checkpoint-3900",
4
+ "epoch": 0.34580599397056216,
5
  "eval_steps": 150,
6
+ "global_step": 3900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2832
  "eval_samples_per_second": 8.418,
2833
  "eval_steps_per_second": 8.418,
2834
  "step": 3750
2835
+ },
2836
+ {
2837
+ "epoch": 0.3333924454690548,
2838
+ "grad_norm": 6.615363597869873,
2839
+ "learning_rate": 1.4986307228237271e-05,
2840
+ "loss": 1.4275,
2841
+ "step": 3760
2842
+ },
2843
+ {
2844
+ "epoch": 0.33427912750487676,
2845
+ "grad_norm": 10.221020698547363,
2846
+ "learning_rate": 1.4758181262419428e-05,
2847
+ "loss": 1.5383,
2848
+ "step": 3770
2849
+ },
2850
+ {
2851
+ "epoch": 0.3351658095406987,
2852
+ "grad_norm": 7.772680759429932,
2853
+ "learning_rate": 1.4531503949737111e-05,
2854
+ "loss": 1.4759,
2855
+ "step": 3780
2856
+ },
2857
+ {
2858
+ "epoch": 0.3360524915765207,
2859
+ "grad_norm": 12.315176963806152,
2860
+ "learning_rate": 1.4306284608006841e-05,
2861
+ "loss": 1.6371,
2862
+ "step": 3790
2863
+ },
2864
+ {
2865
+ "epoch": 0.3369391736123426,
2866
+ "grad_norm": 5.77493953704834,
2867
+ "learning_rate": 1.408253249511363e-05,
2868
+ "loss": 1.5273,
2869
+ "step": 3800
2870
+ },
2871
+ {
2872
+ "epoch": 0.3378258556481646,
2873
+ "grad_norm": 7.274715423583984,
2874
+ "learning_rate": 1.3860256808630431e-05,
2875
+ "loss": 1.5488,
2876
+ "step": 3810
2877
+ },
2878
+ {
2879
+ "epoch": 0.3387125376839865,
2880
+ "grad_norm": 5.848362922668457,
2881
+ "learning_rate": 1.3639466685440138e-05,
2882
+ "loss": 1.3308,
2883
+ "step": 3820
2884
+ },
2885
+ {
2886
+ "epoch": 0.3395992197198085,
2887
+ "grad_norm": 7.360718727111816,
2888
+ "learning_rate": 1.3420171201359936e-05,
2889
+ "loss": 1.3528,
2890
+ "step": 3830
2891
+ },
2892
+ {
2893
+ "epoch": 0.3404859017556304,
2894
+ "grad_norm": 7.868961334228516,
2895
+ "learning_rate": 1.3202379370768256e-05,
2896
+ "loss": 1.4614,
2897
+ "step": 3840
2898
+ },
2899
+ {
2900
+ "epoch": 0.3413725837914524,
2901
+ "grad_norm": 9.383559226989746,
2902
+ "learning_rate": 1.2986100146234235e-05,
2903
+ "loss": 1.4874,
2904
+ "step": 3850
2905
+ },
2906
+ {
2907
+ "epoch": 0.34225926582727434,
2908
+ "grad_norm": 11.13818645477295,
2909
+ "learning_rate": 1.277134241814966e-05,
2910
+ "loss": 1.3849,
2911
+ "step": 3860
2912
+ },
2913
+ {
2914
+ "epoch": 0.34314594786309627,
2915
+ "grad_norm": 10.342170715332031,
2916
+ "learning_rate": 1.2558115014363595e-05,
2917
+ "loss": 1.3115,
2918
+ "step": 3870
2919
+ },
2920
+ {
2921
+ "epoch": 0.34403262989891825,
2922
+ "grad_norm": 6.494439125061035,
2923
+ "learning_rate": 1.2346426699819462e-05,
2924
+ "loss": 1.4533,
2925
+ "step": 3880
2926
+ },
2927
+ {
2928
+ "epoch": 0.3449193119347402,
2929
+ "grad_norm": 9.364208221435547,
2930
+ "learning_rate": 1.2136286176194748e-05,
2931
+ "loss": 1.4229,
2932
+ "step": 3890
2933
+ },
2934
+ {
2935
+ "epoch": 0.34580599397056216,
2936
+ "grad_norm": 6.627419471740723,
2937
+ "learning_rate": 1.1927702081543282e-05,
2938
+ "loss": 1.2769,
2939
+ "step": 3900
2940
+ },
2941
+ {
2942
+ "epoch": 0.34580599397056216,
2943
+ "eval_loss": 1.4924039840698242,
2944
+ "eval_runtime": 59.4072,
2945
+ "eval_samples_per_second": 8.416,
2946
+ "eval_steps_per_second": 8.416,
2947
+ "step": 3900
2948
  }
2949
  ],
2950
  "logging_steps": 10,
 
2964
  "attributes": {}
2965
  }
2966
  },
2967
+ "total_flos": 1.3131363193066783e+18,
2968
  "train_batch_size": 8,
2969
  "trial_name": null,
2970
  "trial_params": null