besimray commited on
Commit
10a91cb
·
verified ·
1 Parent(s): 7d5b58a

Training in progress, step 270, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c81d175e794ab238d63b2a692ce503c5c4dfef3174dfee2601e03d21ee7e7ff
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a3e84096039afb527d22d691d180e109ca9921f708bdaa27632df4487a4260
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6588d9206907a6c00235f951ec1e76bb5c61306e975035df3959233feba0de44
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc194bba403caad9cbc17f9f6c4159d35cde33e5cba286cd96d11edced40608d
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1be1448f42a3a082b7043ab2c191269d82518d2f41873081925d363ddc8352ea
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3991cb24901cdd4ded826a1eb99233632b9b31143f5465b97735bc74e1caa25
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fac612e1de34a13e54762dd7927b1179494a688e77b41a80ece98fe382c45710
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:765f5571460aced30b253ddf135511867127c526d96f703a3f7058177ad62b46
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.020062446594238,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-260",
4
- "epoch": 0.011751146866737476,
5
  "eval_steps": 5,
6
- "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2251,6 +2251,92 @@
2251
  "eval_samples_per_second": 52.852,
2252
  "eval_steps_per_second": 26.429,
2253
  "step": 260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2254
  }
2255
  ],
2256
  "logging_steps": 1,
@@ -2279,7 +2365,7 @@
2279
  "attributes": {}
2280
  }
2281
  },
2282
- "total_flos": 2731750195200.0,
2283
  "train_batch_size": 2,
2284
  "trial_name": null,
2285
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.01980209350586,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-270",
4
+ "epoch": 0.012203114053919686,
5
  "eval_steps": 5,
6
+ "global_step": 270,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2251
  "eval_samples_per_second": 52.852,
2252
  "eval_steps_per_second": 26.429,
2253
  "step": 260
2254
+ },
2255
+ {
2256
+ "epoch": 0.011796343585455695,
2257
+ "grad_norm": 0.50531005859375,
2258
+ "learning_rate": 9.615410055896015e-05,
2259
+ "loss": 44.0094,
2260
+ "step": 261
2261
+ },
2262
+ {
2263
+ "epoch": 0.011841540304173916,
2264
+ "grad_norm": 0.6205224990844727,
2265
+ "learning_rate": 9.551351696494854e-05,
2266
+ "loss": 44.1,
2267
+ "step": 262
2268
+ },
2269
+ {
2270
+ "epoch": 0.011886737022892137,
2271
+ "grad_norm": 0.5274375081062317,
2272
+ "learning_rate": 9.48731177926821e-05,
2273
+ "loss": 44.1223,
2274
+ "step": 263
2275
+ },
2276
+ {
2277
+ "epoch": 0.011931933741610359,
2278
+ "grad_norm": 0.5149595141410828,
2279
+ "learning_rate": 9.423292936646257e-05,
2280
+ "loss": 44.1192,
2281
+ "step": 264
2282
+ },
2283
+ {
2284
+ "epoch": 0.01197713046032858,
2285
+ "grad_norm": 0.5359209179878235,
2286
+ "learning_rate": 9.359297800192872e-05,
2287
+ "loss": 44.1155,
2288
+ "step": 265
2289
+ },
2290
+ {
2291
+ "epoch": 0.01197713046032858,
2292
+ "eval_loss": 11.019892692565918,
2293
+ "eval_runtime": 176.1866,
2294
+ "eval_samples_per_second": 52.881,
2295
+ "eval_steps_per_second": 26.444,
2296
+ "step": 265
2297
+ },
2298
+ {
2299
+ "epoch": 0.012022327179046801,
2300
+ "grad_norm": 0.5752252340316772,
2301
+ "learning_rate": 9.29532900049746e-05,
2302
+ "loss": 44.0821,
2303
+ "step": 266
2304
+ },
2305
+ {
2306
+ "epoch": 0.012067523897765022,
2307
+ "grad_norm": 0.5125178098678589,
2308
+ "learning_rate": 9.231389167066837e-05,
2309
+ "loss": 44.061,
2310
+ "step": 267
2311
+ },
2312
+ {
2313
+ "epoch": 0.012112720616483243,
2314
+ "grad_norm": 0.5295204520225525,
2315
+ "learning_rate": 9.167480928217108e-05,
2316
+ "loss": 43.9889,
2317
+ "step": 268
2318
+ },
2319
+ {
2320
+ "epoch": 0.012157917335201465,
2321
+ "grad_norm": 0.40016570687294006,
2322
+ "learning_rate": 9.103606910965666e-05,
2323
+ "loss": 44.0684,
2324
+ "step": 269
2325
+ },
2326
+ {
2327
+ "epoch": 0.012203114053919686,
2328
+ "grad_norm": 0.42660149931907654,
2329
+ "learning_rate": 9.039769740923183e-05,
2330
+ "loss": 44.0547,
2331
+ "step": 270
2332
+ },
2333
+ {
2334
+ "epoch": 0.012203114053919686,
2335
+ "eval_loss": 11.01980209350586,
2336
+ "eval_runtime": 176.1599,
2337
+ "eval_samples_per_second": 52.889,
2338
+ "eval_steps_per_second": 26.448,
2339
+ "step": 270
2340
  }
2341
  ],
2342
  "logging_steps": 1,
 
2365
  "attributes": {}
2366
  }
2367
  },
2368
+ "total_flos": 2836817510400.0,
2369
  "train_batch_size": 2,
2370
  "trial_name": null,
2371
  "trial_params": null