besimray commited on
Commit
77b516b
·
verified ·
1 Parent(s): 903ad2d

Training in progress, step 260, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef0fe17d2ab3da4040a3c7977b8c6f341f08d2b05df764b34ad276ac546fa5f1
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c81d175e794ab238d63b2a692ce503c5c4dfef3174dfee2601e03d21ee7e7ff
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347185a8a18472d85b591d6c60188bd66fadd6135ef8c34f34376e84d00d9080
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6588d9206907a6c00235f951ec1e76bb5c61306e975035df3959233feba0de44
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:472f6fb72bea35702df9d08ef02dd4970a6ff81a3410e842d1e8a245dcd7f271
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be1448f42a3a082b7043ab2c191269d82518d2f41873081925d363ddc8352ea
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8626d0d5c57fc88c3d7f36cad001868cddc459f981786e9f1343c59e21cde7ac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac612e1de34a13e54762dd7927b1179494a688e77b41a80ece98fe382c45710
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.020323753356934,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
- "epoch": 0.011299179679555264,
5
  "eval_steps": 5,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2165,6 +2165,92 @@
2165
  "eval_samples_per_second": 52.839,
2166
  "eval_steps_per_second": 26.422,
2167
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2168
  }
2169
  ],
2170
  "logging_steps": 1,
@@ -2193,7 +2279,7 @@
2193
  "attributes": {}
2194
  }
2195
  },
2196
- "total_flos": 2626682880000.0,
2197
  "train_batch_size": 2,
2198
  "trial_name": null,
2199
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.020062446594238,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-260",
4
+ "epoch": 0.011751146866737476,
5
  "eval_steps": 5,
6
+ "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2165
  "eval_samples_per_second": 52.839,
2166
  "eval_steps_per_second": 26.422,
2167
  "step": 250
2168
+ },
2169
+ {
2170
+ "epoch": 0.011344376398273485,
2171
+ "grad_norm": 0.5418515801429749,
2172
+ "learning_rate": 0.00010256428432180956,
2173
+ "loss": 44.0602,
2174
+ "step": 251
2175
+ },
2176
+ {
2177
+ "epoch": 0.011389573116991706,
2178
+ "grad_norm": 0.45757991075515747,
2179
+ "learning_rate": 0.00010192330547876871,
2180
+ "loss": 44.0788,
2181
+ "step": 252
2182
+ },
2183
+ {
2184
+ "epoch": 0.011434769835709927,
2185
+ "grad_norm": 0.5210107564926147,
2186
+ "learning_rate": 0.00010128224757617274,
2187
+ "loss": 44.0517,
2188
+ "step": 253
2189
+ },
2190
+ {
2191
+ "epoch": 0.011479966554428149,
2192
+ "grad_norm": 0.39198753237724304,
2193
+ "learning_rate": 0.00010064113696540111,
2194
+ "loss": 44.0776,
2195
+ "step": 254
2196
+ },
2197
+ {
2198
+ "epoch": 0.01152516327314637,
2199
+ "grad_norm": 0.4305363893508911,
2200
+ "learning_rate": 0.0001,
2201
+ "loss": 44.1121,
2202
+ "step": 255
2203
+ },
2204
+ {
2205
+ "epoch": 0.01152516327314637,
2206
+ "eval_loss": 11.02021312713623,
2207
+ "eval_runtime": 176.1601,
2208
+ "eval_samples_per_second": 52.889,
2209
+ "eval_steps_per_second": 26.448,
2210
+ "step": 255
2211
+ },
2212
+ {
2213
+ "epoch": 0.011570359991864591,
2214
+ "grad_norm": 0.4909750521183014,
2215
+ "learning_rate": 9.93588630345989e-05,
2216
+ "loss": 44.0858,
2217
+ "step": 256
2218
+ },
2219
+ {
2220
+ "epoch": 0.011615556710582812,
2221
+ "grad_norm": 0.4016626477241516,
2222
+ "learning_rate": 9.871775242382727e-05,
2223
+ "loss": 44.0732,
2224
+ "step": 257
2225
+ },
2226
+ {
2227
+ "epoch": 0.011660753429301033,
2228
+ "grad_norm": 0.5827097296714783,
2229
+ "learning_rate": 9.80766945212313e-05,
2230
+ "loss": 44.0957,
2231
+ "step": 258
2232
+ },
2233
+ {
2234
+ "epoch": 0.011705950148019255,
2235
+ "grad_norm": 0.48728469014167786,
2236
+ "learning_rate": 9.743571567819046e-05,
2237
+ "loss": 44.0648,
2238
+ "step": 259
2239
+ },
2240
+ {
2241
+ "epoch": 0.011751146866737476,
2242
+ "grad_norm": 0.455342173576355,
2243
+ "learning_rate": 9.679484224283449e-05,
2244
+ "loss": 44.0327,
2245
+ "step": 260
2246
+ },
2247
+ {
2248
+ "epoch": 0.011751146866737476,
2249
+ "eval_loss": 11.020062446594238,
2250
+ "eval_runtime": 176.2853,
2251
+ "eval_samples_per_second": 52.852,
2252
+ "eval_steps_per_second": 26.429,
2253
+ "step": 260
2254
  }
2255
  ],
2256
  "logging_steps": 1,
 
2279
  "attributes": {}
2280
  }
2281
  },
2282
+ "total_flos": 2731750195200.0,
2283
  "train_batch_size": 2,
2284
  "trial_name": null,
2285
  "trial_params": null