besimray commited on
Commit
8e8f145
·
verified ·
1 Parent(s): 8e463af

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a325469530c5116393c590cef5706d3eee407287829ff8aba0243257c6a3e152
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f05558c21d49cb9e7b7542371d3bd7e5fa7c4e23a524cb4a2e2d951ce767b7
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22a1ea3e95e617f44c829178caeeb753eb9447db9a8173560f8f0ca712a74448
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b111a7ec6a458128370132327494eb113dae7f9904096d27da271d0689cf8b08
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:478569fdd6c2c85cc9a9d4f3f42aedfcff08e9b9c0032e129370e035f8798baf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35155c640e8cacf8f819b6b10fb25c906257dbef0b22d3a1ba399ce4e53aa194
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfc2bf0eccc6c4e85c949c664a83bcd160767da77920eebf352a6f7f7c4c9b2e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54b996514a941dd419a3f7869454171b960cb51cf1b91d9b10dbdcf1b1e50a10
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8324581384658813,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.11933174224343675,
5
  "eval_steps": 5,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -445,6 +445,92 @@
445
  "eval_samples_per_second": 6.724,
446
  "eval_steps_per_second": 3.381,
447
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  }
449
  ],
450
  "logging_steps": 1,
@@ -473,7 +559,7 @@
473
  "attributes": {}
474
  }
475
  },
476
- "total_flos": 9788035851878400.0,
477
  "train_batch_size": 2,
478
  "trial_name": null,
479
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8281893730163574,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-60",
4
+ "epoch": 0.1431980906921241,
5
  "eval_steps": 5,
6
+ "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
445
  "eval_samples_per_second": 6.724,
446
  "eval_steps_per_second": 3.381,
447
  "step": 50
448
+ },
449
+ {
450
+ "epoch": 0.12171837708830549,
451
+ "grad_norm": 0.35318946838378906,
452
+ "learning_rate": 0.00019656487088855592,
453
+ "loss": 1.033,
454
+ "step": 51
455
+ },
456
+ {
457
+ "epoch": 0.12410501193317422,
458
+ "grad_norm": 0.30267930030822754,
459
+ "learning_rate": 0.00019639628606958533,
460
+ "loss": 0.6305,
461
+ "step": 52
462
+ },
463
+ {
464
+ "epoch": 0.12649164677804295,
465
+ "grad_norm": 0.30554893612861633,
466
+ "learning_rate": 0.0001962237387768529,
467
+ "loss": 0.7148,
468
+ "step": 53
469
+ },
470
+ {
471
+ "epoch": 0.1288782816229117,
472
+ "grad_norm": 0.3179020583629608,
473
+ "learning_rate": 0.00019604723610310194,
474
+ "loss": 0.6932,
475
+ "step": 54
476
+ },
477
+ {
478
+ "epoch": 0.13126491646778043,
479
+ "grad_norm": 0.43141505122184753,
480
+ "learning_rate": 0.00019586678530366606,
481
+ "loss": 0.9695,
482
+ "step": 55
483
+ },
484
+ {
485
+ "epoch": 0.13126491646778043,
486
+ "eval_loss": 0.8304810523986816,
487
+ "eval_runtime": 26.2933,
488
+ "eval_samples_per_second": 6.732,
489
+ "eval_steps_per_second": 3.385,
490
+ "step": 55
491
+ },
492
+ {
493
+ "epoch": 0.13365155131264916,
494
+ "grad_norm": 0.37711283564567566,
495
+ "learning_rate": 0.00019568239379617088,
496
+ "loss": 1.0899,
497
+ "step": 56
498
+ },
499
+ {
500
+ "epoch": 0.1360381861575179,
501
+ "grad_norm": 0.3021875023841858,
502
+ "learning_rate": 0.00019549406916022905,
503
+ "loss": 0.5727,
504
+ "step": 57
505
+ },
506
+ {
507
+ "epoch": 0.13842482100238662,
508
+ "grad_norm": 0.4609815180301666,
509
+ "learning_rate": 0.00019530181913712872,
510
+ "loss": 0.8931,
511
+ "step": 58
512
+ },
513
+ {
514
+ "epoch": 0.14081145584725538,
515
+ "grad_norm": 0.410404235124588,
516
+ "learning_rate": 0.00019510565162951537,
517
+ "loss": 0.9462,
518
+ "step": 59
519
+ },
520
+ {
521
+ "epoch": 0.1431980906921241,
522
+ "grad_norm": 0.3426823318004608,
523
+ "learning_rate": 0.00019490557470106686,
524
+ "loss": 0.767,
525
+ "step": 60
526
+ },
527
+ {
528
+ "epoch": 0.1431980906921241,
529
+ "eval_loss": 0.8281893730163574,
530
+ "eval_runtime": 26.309,
531
+ "eval_samples_per_second": 6.728,
532
+ "eval_steps_per_second": 3.383,
533
+ "step": 60
534
  }
535
  ],
536
  "logging_steps": 1,
 
559
  "attributes": {}
560
  }
561
  },
562
+ "total_flos": 1.174564302225408e+16,
563
  "train_batch_size": 2,
564
  "trial_name": null,
565
  "trial_params": null