besimray commited on
Commit
b4dd036
·
verified ·
1 Parent(s): 4f9ee58

Training in progress, step 360, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f2f0a76da24a1ad81dcda706248e253d52a191704ea53265b6d9465df517e2d
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3da1bc8e78d62a43f5fc08c54b7da66788aab90b181d1cce0dc0e44a0c7ead64
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:501e40adaa4f70964a8cd6a1dfdb25bf4ad0f9592e59902ac2cdb07292b6512f
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9f5ab26f7ae0259dbc2872ec134c30afd17bf2cb254c9c4159232de93a6a4c
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17a037725bec0a8d6f8f48eff9706f34e57cf5ea36d4c0fe1640235ccea83807
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63df2ca6f0708e85ea13f7c75a1ee020a6142b982449338d6d6d3a95d80f4533
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c35e24dc9e28d5479cc0116b5a139eadfaf05e8ad587dfecd6be3b09104334b7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89431c5d37cecda04572e69a3e5ff0abc04241564413b5510718a518a469399c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.018669128417969,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
- "epoch": 0.01581885155137737,
5
  "eval_steps": 5,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3025,6 +3025,92 @@
3025
  "eval_samples_per_second": 52.869,
3026
  "eval_steps_per_second": 26.437,
3027
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3028
  }
3029
  ],
3030
  "logging_steps": 1,
@@ -3053,7 +3139,7 @@
3053
  "attributes": {}
3054
  }
3055
  },
3056
- "total_flos": 3677356032000.0,
3057
  "train_batch_size": 2,
3058
  "trial_name": null,
3059
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.018574714660645,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-360",
4
+ "epoch": 0.01627081873855958,
5
  "eval_steps": 5,
6
+ "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3025
  "eval_samples_per_second": 52.869,
3026
  "eval_steps_per_second": 26.437,
3027
  "step": 350
3028
+ },
3029
+ {
3030
+ "epoch": 0.01586404827009559,
3031
+ "grad_norm": 0.4974361062049866,
3032
+ "learning_rate": 4.2263667111118074e-05,
3033
+ "loss": 44.0836,
3034
+ "step": 351
3035
+ },
3036
+ {
3037
+ "epoch": 0.01590924498881381,
3038
+ "grad_norm": 0.4839700162410736,
3039
+ "learning_rate": 4.174137354896039e-05,
3040
+ "loss": 44.0984,
3041
+ "step": 352
3042
+ },
3043
+ {
3044
+ "epoch": 0.015954441707532033,
3045
+ "grad_norm": 0.4186987578868866,
3046
+ "learning_rate": 4.12214747707527e-05,
3047
+ "loss": 44.0672,
3048
+ "step": 353
3049
+ },
3050
+ {
3051
+ "epoch": 0.015999638426250254,
3052
+ "grad_norm": 0.5234962701797485,
3053
+ "learning_rate": 4.0703992147497425e-05,
3054
+ "loss": 44.0376,
3055
+ "step": 354
3056
+ },
3057
+ {
3058
+ "epoch": 0.016044835144968475,
3059
+ "grad_norm": 0.47532570362091064,
3060
+ "learning_rate": 4.0188946950878404e-05,
3061
+ "loss": 44.0386,
3062
+ "step": 355
3063
+ },
3064
+ {
3065
+ "epoch": 0.016044835144968475,
3066
+ "eval_loss": 11.018640518188477,
3067
+ "eval_runtime": 176.1029,
3068
+ "eval_samples_per_second": 52.907,
3069
+ "eval_steps_per_second": 26.456,
3070
+ "step": 355
3071
+ },
3072
+ {
3073
+ "epoch": 0.016090031863686696,
3074
+ "grad_norm": 0.397630900144577,
3075
+ "learning_rate": 3.9676360352386356e-05,
3076
+ "loss": 44.1375,
3077
+ "step": 356
3078
+ },
3079
+ {
3080
+ "epoch": 0.016135228582404917,
3081
+ "grad_norm": 0.530908465385437,
3082
+ "learning_rate": 3.9166253422448686e-05,
3083
+ "loss": 44.1015,
3084
+ "step": 357
3085
+ },
3086
+ {
3087
+ "epoch": 0.01618042530112314,
3088
+ "grad_norm": 0.41138243675231934,
3089
+ "learning_rate": 3.8658647129563364e-05,
3090
+ "loss": 44.0516,
3091
+ "step": 358
3092
+ },
3093
+ {
3094
+ "epoch": 0.01622562201984136,
3095
+ "grad_norm": 0.5258074402809143,
3096
+ "learning_rate": 3.8153562339436855e-05,
3097
+ "loss": 44.1157,
3098
+ "step": 359
3099
+ },
3100
+ {
3101
+ "epoch": 0.01627081873855958,
3102
+ "grad_norm": 0.3948734402656555,
3103
+ "learning_rate": 3.7651019814126654e-05,
3104
+ "loss": 44.0478,
3105
+ "step": 360
3106
+ },
3107
+ {
3108
+ "epoch": 0.01627081873855958,
3109
+ "eval_loss": 11.018574714660645,
3110
+ "eval_runtime": 176.3307,
3111
+ "eval_samples_per_second": 52.838,
3112
+ "eval_steps_per_second": 26.422,
3113
+ "step": 360
3114
  }
3115
  ],
3116
  "logging_steps": 1,
 
3139
  "attributes": {}
3140
  }
3141
  },
3142
+ "total_flos": 3782423347200.0,
3143
  "train_batch_size": 2,
3144
  "trial_name": null,
3145
  "trial_params": null