nifftypoo commited on
Commit
430abcf
·
verified ·
1 Parent(s): 2a216a6

Training in progress, step 4050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c28f68060874c7837e83165f737e86ddf90d548220a521be0b0f401eff33a5f1
3
  size 1077970296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a466aa412b472a720496b36b896a897f1a7cdb641a5aeae5c97fa00d6c450dff
3
  size 1077970296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2914614d31a57cd505cfbcf54c7bec1d78c350646af8f0f24ddd5ca0f7093430
3
- size 2156088250
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968ca9cf4084a5ba35e9eb43968fd4f30479f57a9643de3be51fbbf489de7fbb
3
+ size 2156088186
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fae47426850d84262c0c714354b1aaa92524f7f42fad82a7f80fc3312154556
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:563a766f82e35a43b37668eed15adc7521ade53f614302ba0d81b3d046d0f375
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74cfdf7ac09b0e2d6096c5ed33aeb70fe2b41a0b29dfa76982b8f8b0aa2c371b
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286a22d17c4ec91464253232b9604147b9c42456481200a21ca66148fbcc21f3
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.0137948989868164,
3
  "best_model_checkpoint": "./output/checkpoint-2850",
4
- "epoch": 3.6757775683317626,
5
  "eval_steps": 150,
6
- "global_step": 3900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2945,6 +2945,119 @@
2945
  "eval_samples_per_second": 9.917,
2946
  "eval_steps_per_second": 9.917,
2947
  "step": 3900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2948
  }
2949
  ],
2950
  "logging_steps": 10,
@@ -2964,7 +3077,7 @@
2964
  "attributes": {}
2965
  }
2966
  },
2967
- "total_flos": 5.889274255304294e+16,
2968
  "train_batch_size": 8,
2969
  "trial_name": null,
2970
  "trial_params": null
 
1
  {
2
  "best_metric": 1.0137948989868164,
3
  "best_model_checkpoint": "./output/checkpoint-2850",
4
+ "epoch": 3.817153628652215,
5
  "eval_steps": 150,
6
+ "global_step": 4050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2945
  "eval_samples_per_second": 9.917,
2946
  "eval_steps_per_second": 9.917,
2947
  "step": 3900
2948
+ },
2949
+ {
2950
+ "epoch": 3.6852026390197925,
2951
+ "grad_norm": 5.458437919616699,
2952
+ "learning_rate": 1.6425064001023903e-06,
2953
+ "loss": 0.2519,
2954
+ "step": 3910
2955
+ },
2956
+ {
2957
+ "epoch": 3.694627709707823,
2958
+ "grad_norm": 8.376575469970703,
2959
+ "learning_rate": 1.6424547130739113e-06,
2960
+ "loss": 0.2318,
2961
+ "step": 3920
2962
+ },
2963
+ {
2964
+ "epoch": 3.7040527803958527,
2965
+ "grad_norm": 6.276689052581787,
2966
+ "learning_rate": 1.6423685704360984e-06,
2967
+ "loss": 0.2489,
2968
+ "step": 3930
2969
+ },
2970
+ {
2971
+ "epoch": 3.713477851083883,
2972
+ "grad_norm": 9.211997985839844,
2973
+ "learning_rate": 1.6422479758033246e-06,
2974
+ "loss": 0.2881,
2975
+ "step": 3940
2976
+ },
2977
+ {
2978
+ "epoch": 3.7229029217719134,
2979
+ "grad_norm": 10.033238410949707,
2980
+ "learning_rate": 1.642092934235499e-06,
2981
+ "loss": 0.2683,
2982
+ "step": 3950
2983
+ },
2984
+ {
2985
+ "epoch": 3.7323279924599433,
2986
+ "grad_norm": 8.193641662597656,
2987
+ "learning_rate": 1.6419034522378548e-06,
2988
+ "loss": 0.2373,
2989
+ "step": 3960
2990
+ },
2991
+ {
2992
+ "epoch": 3.7417530631479736,
2993
+ "grad_norm": 12.219656944274902,
2994
+ "learning_rate": 1.641679537760678e-06,
2995
+ "loss": 0.2695,
2996
+ "step": 3970
2997
+ },
2998
+ {
2999
+ "epoch": 3.751178133836004,
3000
+ "grad_norm": 7.339855670928955,
3001
+ "learning_rate": 1.6414212001989711e-06,
3002
+ "loss": 0.2652,
3003
+ "step": 3980
3004
+ },
3005
+ {
3006
+ "epoch": 3.760603204524034,
3007
+ "grad_norm": 7.646217346191406,
3008
+ "learning_rate": 1.641128450392061e-06,
3009
+ "loss": 0.2662,
3010
+ "step": 3990
3011
+ },
3012
+ {
3013
+ "epoch": 3.770028275212064,
3014
+ "grad_norm": 8.313796997070312,
3015
+ "learning_rate": 1.640801300623143e-06,
3016
+ "loss": 0.2143,
3017
+ "step": 4000
3018
+ },
3019
+ {
3020
+ "epoch": 3.7794533459000945,
3021
+ "grad_norm": 6.259695053100586,
3022
+ "learning_rate": 1.6404397646187665e-06,
3023
+ "loss": 0.2267,
3024
+ "step": 4010
3025
+ },
3026
+ {
3027
+ "epoch": 3.7888784165881244,
3028
+ "grad_norm": 6.719018459320068,
3029
+ "learning_rate": 1.6400438575482577e-06,
3030
+ "loss": 0.2349,
3031
+ "step": 4020
3032
+ },
3033
+ {
3034
+ "epoch": 3.7983034872761543,
3035
+ "grad_norm": 6.6731109619140625,
3036
+ "learning_rate": 1.6396135960230847e-06,
3037
+ "loss": 0.2621,
3038
+ "step": 4030
3039
+ },
3040
+ {
3041
+ "epoch": 3.8077285579641846,
3042
+ "grad_norm": 10.587440490722656,
3043
+ "learning_rate": 1.6391489980961598e-06,
3044
+ "loss": 0.2612,
3045
+ "step": 4040
3046
+ },
3047
+ {
3048
+ "epoch": 3.817153628652215,
3049
+ "grad_norm": 7.596518039703369,
3050
+ "learning_rate": 1.6386500832610807e-06,
3051
+ "loss": 0.2066,
3052
+ "step": 4050
3053
+ },
3054
+ {
3055
+ "epoch": 3.817153628652215,
3056
+ "eval_loss": 1.0363526344299316,
3057
+ "eval_runtime": 52.2082,
3058
+ "eval_samples_per_second": 9.577,
3059
+ "eval_steps_per_second": 9.577,
3060
+ "step": 4050
3061
  }
3062
  ],
3063
  "logging_steps": 10,
 
3077
  "attributes": {}
3078
  }
3079
  },
3080
+ "total_flos": 6.104925105153638e+16,
3081
  "train_batch_size": 8,
3082
  "trial_name": null,
3083
  "trial_params": null