besimray commited on
Commit
1f65841
·
verified ·
1 Parent(s): 75156c2

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8791fae23023cf2106e0168a68fe7cfc76cf5009e3729f4af35acf290bf1f97
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f2f0a76da24a1ad81dcda706248e253d52a191704ea53265b6d9465df517e2d
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:715153c301e34bc9e1ba77f1996ccb2e9a6de47d2fddaab7f57690744eaa6ccb
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:501e40adaa4f70964a8cd6a1dfdb25bf4ad0f9592e59902ac2cdb07292b6512f
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e92355acea1d637dfee0848ce97e0a35db7f2918dda84a5212f2ddf67f03a2c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a037725bec0a8d6f8f48eff9706f34e57cf5ea36d4c0fe1640235ccea83807
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc3b597e507c818a1b809c10b26354c2c68ff2bc801d315b5aadc95eb0081237
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c35e24dc9e28d5479cc0116b5a139eadfaf05e8ad587dfecd6be3b09104334b7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.018733024597168,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-340",
4
- "epoch": 0.01536688436419516,
5
  "eval_steps": 5,
6
- "global_step": 340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2939,6 +2939,92 @@
2939
  "eval_samples_per_second": 52.85,
2940
  "eval_steps_per_second": 26.428,
2941
  "step": 340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2942
  }
2943
  ],
2944
  "logging_steps": 1,
@@ -2967,7 +3053,7 @@
2967
  "attributes": {}
2968
  }
2969
  },
2970
- "total_flos": 3572288716800.0,
2971
  "train_batch_size": 2,
2972
  "trial_name": null,
2973
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.018669128417969,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
+ "epoch": 0.01581885155137737,
5
  "eval_steps": 5,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2939
  "eval_samples_per_second": 52.85,
2940
  "eval_steps_per_second": 26.428,
2941
  "step": 340
2942
+ },
2943
+ {
2944
+ "epoch": 0.01541208108291338,
2945
+ "grad_norm": 0.46714112162590027,
2946
+ "learning_rate": 4.7613544987293446e-05,
2947
+ "loss": 44.007,
2948
+ "step": 341
2949
+ },
2950
+ {
2951
+ "epoch": 0.015457277801631602,
2952
+ "grad_norm": 0.449789434671402,
2953
+ "learning_rate": 4.706850020762126e-05,
2954
+ "loss": 44.0599,
2955
+ "step": 342
2956
+ },
2957
+ {
2958
+ "epoch": 0.015502474520349823,
2959
+ "grad_norm": 0.5278406739234924,
2960
+ "learning_rate": 4.6525631234587034e-05,
2961
+ "loss": 44.0606,
2962
+ "step": 343
2963
+ },
2964
+ {
2965
+ "epoch": 0.015547671239068044,
2966
+ "grad_norm": 0.5856757760047913,
2967
+ "learning_rate": 4.5984960383408005e-05,
2968
+ "loss": 44.0862,
2969
+ "step": 344
2970
+ },
2971
+ {
2972
+ "epoch": 0.015592867957786265,
2973
+ "grad_norm": 0.48914504051208496,
2974
+ "learning_rate": 4.544650987894514e-05,
2975
+ "loss": 44.0642,
2976
+ "step": 345
2977
+ },
2978
+ {
2979
+ "epoch": 0.015592867957786265,
2980
+ "eval_loss": 11.018689155578613,
2981
+ "eval_runtime": 176.1852,
2982
+ "eval_samples_per_second": 52.882,
2983
+ "eval_steps_per_second": 26.444,
2984
+ "step": 345
2985
+ },
2986
+ {
2987
+ "epoch": 0.015638064676504485,
2988
+ "grad_norm": 0.5346770882606506,
2989
+ "learning_rate": 4.491030185478976e-05,
2990
+ "loss": 44.122,
2991
+ "step": 346
2992
+ },
2993
+ {
2994
+ "epoch": 0.015683261395222706,
2995
+ "grad_norm": 0.4303387701511383,
2996
+ "learning_rate": 4.437635835235353e-05,
2997
+ "loss": 44.0754,
2998
+ "step": 347
2999
+ },
3000
+ {
3001
+ "epoch": 0.015728458113940927,
3002
+ "grad_norm": 0.3995809555053711,
3003
+ "learning_rate": 4.384470131996252e-05,
3004
+ "loss": 44.1039,
3005
+ "step": 348
3006
+ },
3007
+ {
3008
+ "epoch": 0.015773654832659148,
3009
+ "grad_norm": 0.44882121682167053,
3010
+ "learning_rate": 4.331535261195504e-05,
3011
+ "loss": 44.1023,
3012
+ "step": 349
3013
+ },
3014
+ {
3015
+ "epoch": 0.01581885155137737,
3016
+ "grad_norm": 0.4910334646701813,
3017
+ "learning_rate": 4.278833398778306e-05,
3018
+ "loss": 44.0906,
3019
+ "step": 350
3020
+ },
3021
+ {
3022
+ "epoch": 0.01581885155137737,
3023
+ "eval_loss": 11.018669128417969,
3024
+ "eval_runtime": 176.2273,
3025
+ "eval_samples_per_second": 52.869,
3026
+ "eval_steps_per_second": 26.437,
3027
+ "step": 350
3028
  }
3029
  ],
3030
  "logging_steps": 1,
 
3053
  "attributes": {}
3054
  }
3055
  },
3056
+ "total_flos": 3677356032000.0,
3057
  "train_batch_size": 2,
3058
  "trial_name": null,
3059
  "trial_params": null