besimray commited on
Commit
9cf4d11
·
verified ·
1 Parent(s): 35ccf5b

Training in progress, step 280, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75a3e84096039afb527d22d691d180e109ca9921f708bdaa27632df4487a4260
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a61b56978b2452570a76d51bb39ac908f59f374a2960576a7c171f54e7ff4a4b
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc194bba403caad9cbc17f9f6c4159d35cde33e5cba286cd96d11edced40608d
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05cd8f7fc6effc7eb3d50ca7c74768114cbda4c7a753cd908b737e12f3421cf
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3991cb24901cdd4ded826a1eb99233632b9b31143f5465b97735bc74e1caa25
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c42d8efe8b5044ea7806228bc26e27f1820a2789f8e060576ef9893082177ba2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:765f5571460aced30b253ddf135511867127c526d96f703a3f7058177ad62b46
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322d234cf66e33210ecc86dafc19f666ddaa73050355080bdffd03ad06871557
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.01980209350586,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-270",
4
- "epoch": 0.012203114053919686,
5
  "eval_steps": 5,
6
- "global_step": 270,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2337,6 +2337,92 @@
2337
  "eval_samples_per_second": 52.889,
2338
  "eval_steps_per_second": 26.448,
2339
  "step": 270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2340
  }
2341
  ],
2342
  "logging_steps": 1,
@@ -2365,7 +2451,7 @@
2365
  "attributes": {}
2366
  }
2367
  },
2368
- "total_flos": 2836817510400.0,
2369
  "train_batch_size": 2,
2370
  "trial_name": null,
2371
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.01966381072998,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-280",
4
+ "epoch": 0.012655081241101896,
5
  "eval_steps": 5,
6
+ "global_step": 280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2337
  "eval_samples_per_second": 52.889,
2338
  "eval_steps_per_second": 26.448,
2339
  "step": 270
2340
+ },
2341
+ {
2342
+ "epoch": 0.012248310772637907,
2343
+ "grad_norm": 0.636551022529602,
2344
+ "learning_rate": 8.975972042185687e-05,
2345
+ "loss": 44.1385,
2346
+ "step": 271
2347
+ },
2348
+ {
2349
+ "epoch": 0.012293507491356128,
2350
+ "grad_norm": 0.5031408071517944,
2351
+ "learning_rate": 8.912216437226693e-05,
2352
+ "loss": 44.1121,
2353
+ "step": 272
2354
+ },
2355
+ {
2356
+ "epoch": 0.01233870421007435,
2357
+ "grad_norm": 0.49243634939193726,
2358
+ "learning_rate": 8.848505546789408e-05,
2359
+ "loss": 44.0864,
2360
+ "step": 273
2361
+ },
2362
+ {
2363
+ "epoch": 0.01238390092879257,
2364
+ "grad_norm": 0.47308340668678284,
2365
+ "learning_rate": 8.784841989778996e-05,
2366
+ "loss": 44.0391,
2367
+ "step": 274
2368
+ },
2369
+ {
2370
+ "epoch": 0.012429097647510792,
2371
+ "grad_norm": 0.43966105580329895,
2372
+ "learning_rate": 8.721228383154939e-05,
2373
+ "loss": 44.0969,
2374
+ "step": 275
2375
+ },
2376
+ {
2377
+ "epoch": 0.012429097647510792,
2378
+ "eval_loss": 11.019760131835938,
2379
+ "eval_runtime": 176.1857,
2380
+ "eval_samples_per_second": 52.882,
2381
+ "eval_steps_per_second": 26.444,
2382
+ "step": 275
2383
+ },
2384
+ {
2385
+ "epoch": 0.012474294366229011,
2386
+ "grad_norm": 0.4853382706642151,
2387
+ "learning_rate": 8.657667341823448e-05,
2388
+ "loss": 44.079,
2389
+ "step": 276
2390
+ },
2391
+ {
2392
+ "epoch": 0.012519491084947232,
2393
+ "grad_norm": 0.453819215297699,
2394
+ "learning_rate": 8.594161478529974e-05,
2395
+ "loss": 44.0371,
2396
+ "step": 277
2397
+ },
2398
+ {
2399
+ "epoch": 0.012564687803665453,
2400
+ "grad_norm": 0.4855421483516693,
2401
+ "learning_rate": 8.530713403751821e-05,
2402
+ "loss": 44.0514,
2403
+ "step": 278
2404
+ },
2405
+ {
2406
+ "epoch": 0.012609884522383675,
2407
+ "grad_norm": 0.49890294671058655,
2408
+ "learning_rate": 8.46732572559084e-05,
2409
+ "loss": 44.0561,
2410
+ "step": 279
2411
+ },
2412
+ {
2413
+ "epoch": 0.012655081241101896,
2414
+ "grad_norm": 0.406686007976532,
2415
+ "learning_rate": 8.404001049666211e-05,
2416
+ "loss": 44.0746,
2417
+ "step": 280
2418
+ },
2419
+ {
2420
+ "epoch": 0.012655081241101896,
2421
+ "eval_loss": 11.01966381072998,
2422
+ "eval_runtime": 176.4032,
2423
+ "eval_samples_per_second": 52.817,
2424
+ "eval_steps_per_second": 26.411,
2425
+ "step": 280
2426
  }
2427
  ],
2428
  "logging_steps": 1,
 
2451
  "attributes": {}
2452
  }
2453
  },
2454
+ "total_flos": 2941884825600.0,
2455
  "train_batch_size": 2,
2456
  "trial_name": null,
2457
  "trial_params": null