farmery commited on
Commit
5faee7e
·
verified ·
1 Parent(s): 1cdf5a1

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edbe32a76c5eeb2f8ad91d32a12ce98652fe7a185a61b4fdd0cee38e13206aa7
3
  size 13587864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b271712254e01f3a756f80c9d91649a68f589f8083c68e28efa6bf81649af5f
3
  size 13587864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd5421d26883dfe758f02f7809e34d4af60d4cbcb6c1a9a39dd2122798acf5f5
3
  size 27273018
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d70e0c506c43b6dd7bfc52aa23ed7049ddf97443e93d45fefb733a105c2b0c8
3
  size 27273018
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d41027b1107f70688b49e11ac3acba1ad49990a97dde70368c847788acbafa22
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4fea74c51f4405caaaae971a4ccfc94a699f62e8257c2d08259a26bff3fc4eb
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da3caf01e5476197ff734fa4d21d7a64cc29034ee82407e7813c64a66980430e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8287c8e13a42f467f666595aef17b0751f60576eb55d01ba59d28223687d2973
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7516e1415c24b35958baff6e9c4d3edca58fd3a76104c481f6630885714cc2a7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b840cecfb7dec70c76a70327d64f5f23b2d66b0b6a1f665ecb2134abed6ccdbb
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ee98dbbdbada8bae41a60d283e699a374fc4ccfdefd92d90ca37436383d0d30
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841574a58757d90b95116e1fd5d2951f131533acbad72a77bcd1a82810ab44b9
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c02a460177c98dec6f7d22d0a863ffd20f73da06cb1991cfe95edd9fb85df0db
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b421b30093a741efda571bdeb6770d480a0573a9ee33dd469ec29cc944e02e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.2425832748413086,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.6517311608961304,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,372 @@
381
  "eval_samples_per_second": 178.69,
382
  "eval_steps_per_second": 46.46,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -409,7 +775,7 @@
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 2.22140137734144e+16,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.9786142110824585,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.3034623217922607,
5
  "eval_steps": 25,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 178.69,
382
  "eval_steps_per_second": 46.46,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.664765784114053,
387
+ "grad_norm": 1.1472358703613281,
388
+ "learning_rate": 0.00023970785914785144,
389
+ "loss": 2.2791,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.6778004073319756,
394
+ "grad_norm": 1.3499984741210938,
395
+ "learning_rate": 0.00023733661732216452,
396
+ "loss": 2.3292,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.6908350305498981,
401
+ "grad_norm": 1.2178796529769897,
402
+ "learning_rate": 0.00023493364626537257,
403
+ "loss": 2.2938,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.7038696537678207,
408
+ "grad_norm": 0.9463868737220764,
409
+ "learning_rate": 0.00023249999999999999,
410
+ "loss": 2.2173,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 0.7169042769857433,
415
+ "grad_norm": 0.5750055909156799,
416
+ "learning_rate": 0.00023003674600373153,
417
+ "loss": 2.2036,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 0.729938900203666,
422
+ "grad_norm": 0.3371862769126892,
423
+ "learning_rate": 0.00022754496474118133,
424
+ "loss": 2.167,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 0.7429735234215886,
429
+ "grad_norm": 0.6258431673049927,
430
+ "learning_rate": 0.00022502574918996517,
431
+ "loss": 2.2211,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 0.7560081466395112,
436
+ "grad_norm": 0.7280526161193848,
437
+ "learning_rate": 0.00022248020436128478,
438
+ "loss": 2.2122,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 0.7690427698574338,
443
+ "grad_norm": 0.8275133371353149,
444
+ "learning_rate": 0.00021990944681523302,
445
+ "loss": 2.2137,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 0.7820773930753564,
450
+ "grad_norm": 0.8840050101280212,
451
+ "learning_rate": 0.0002173146041710339,
452
+ "loss": 2.2311,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 0.795112016293279,
457
+ "grad_norm": 0.957916796207428,
458
+ "learning_rate": 0.00021469681461243153,
459
+ "loss": 2.2478,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 0.8081466395112016,
464
+ "grad_norm": 1.0393691062927246,
465
+ "learning_rate": 0.00021205722638844505,
466
+ "loss": 2.2579,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 0.8211812627291243,
471
+ "grad_norm": 0.3389835059642792,
472
+ "learning_rate": 0.00020939699730970873,
473
+ "loss": 2.1799,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 0.8342158859470469,
478
+ "grad_norm": 0.5756048560142517,
479
+ "learning_rate": 0.00020671729424061788,
480
+ "loss": 2.1684,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 0.8472505091649695,
485
+ "grad_norm": 0.7830222249031067,
486
+ "learning_rate": 0.00020401929258750365,
487
+ "loss": 2.1438,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 0.8602851323828921,
492
+ "grad_norm": 0.8117411732673645,
493
+ "learning_rate": 0.00020130417578306082,
494
+ "loss": 2.1536,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 0.8733197556008147,
499
+ "grad_norm": 0.7848119735717773,
500
+ "learning_rate": 0.0001985731347672554,
501
+ "loss": 2.129,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 0.8863543788187372,
506
+ "grad_norm": 0.6703603863716125,
507
+ "learning_rate": 0.00019582736746493853,
508
+ "loss": 2.152,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 0.8993890020366598,
513
+ "grad_norm": 0.4845719635486603,
514
+ "learning_rate": 0.00019306807826039747,
515
+ "loss": 2.1237,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 0.9124236252545825,
520
+ "grad_norm": 0.3355918228626251,
521
+ "learning_rate": 0.00019029647746907283,
522
+ "loss": 2.0711,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 0.9254582484725051,
527
+ "grad_norm": 0.33707523345947266,
528
+ "learning_rate": 0.00018751378080667378,
529
+ "loss": 2.1033,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 0.9384928716904277,
534
+ "grad_norm": 0.5474686622619629,
535
+ "learning_rate": 0.00018472120885592555,
536
+ "loss": 2.0707,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 0.9515274949083503,
541
+ "grad_norm": 0.71000075340271,
542
+ "learning_rate": 0.00018191998653118108,
543
+ "loss": 2.1169,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 0.9645621181262729,
548
+ "grad_norm": 0.8560431003570557,
549
+ "learning_rate": 0.0001791113425411332,
550
+ "loss": 2.176,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 0.9775967413441955,
555
+ "grad_norm": 1.2276349067687988,
556
+ "learning_rate": 0.0001762965088498626,
557
+ "loss": 2.2966,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 0.9775967413441955,
562
+ "eval_loss": 2.044175148010254,
563
+ "eval_runtime": 0.2806,
564
+ "eval_samples_per_second": 178.197,
565
+ "eval_steps_per_second": 46.331,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 0.9906313645621181,
570
+ "grad_norm": 0.28324252367019653,
571
+ "learning_rate": 0.0001734767201364573,
572
+ "loss": 2.0594,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 1.0036659877800407,
577
+ "grad_norm": 0.4252139627933502,
578
+ "learning_rate": 0.00017065321325344194,
579
+ "loss": 2.9023,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 1.0167006109979633,
584
+ "grad_norm": 0.532595157623291,
585
+ "learning_rate": 0.00016782722668425316,
586
+ "loss": 1.9476,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 1.0297352342158859,
591
+ "grad_norm": 0.696269690990448,
592
+ "learning_rate": 0.000165,
593
+ "loss": 2.089,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 1.0427698574338085,
598
+ "grad_norm": 0.6571292281150818,
599
+ "learning_rate": 0.00016217277331574678,
600
+ "loss": 2.0514,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 1.055804480651731,
605
+ "grad_norm": 0.5735260248184204,
606
+ "learning_rate": 0.00015934678674655805,
607
+ "loss": 2.0645,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 1.0688391038696539,
612
+ "grad_norm": 0.418550968170166,
613
+ "learning_rate": 0.0001565232798635427,
614
+ "loss": 2.0639,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 1.0818737270875765,
619
+ "grad_norm": 0.32165512442588806,
620
+ "learning_rate": 0.00015370349115013742,
621
+ "loss": 2.0412,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 1.094908350305499,
626
+ "grad_norm": 0.3411344289779663,
627
+ "learning_rate": 0.0001508886574588668,
628
+ "loss": 2.0738,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 1.1079429735234216,
633
+ "grad_norm": 0.4526989459991455,
634
+ "learning_rate": 0.0001480800134688189,
635
+ "loss": 2.0482,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 1.1209775967413442,
640
+ "grad_norm": 0.5264050960540771,
641
+ "learning_rate": 0.00014527879114407445,
642
+ "loss": 2.0155,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 1.1340122199592668,
647
+ "grad_norm": 0.6333541870117188,
648
+ "learning_rate": 0.0001424862191933262,
649
+ "loss": 2.029,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 1.1470468431771894,
654
+ "grad_norm": 0.6475998759269714,
655
+ "learning_rate": 0.00013970352253092714,
656
+ "loss": 2.0732,
657
+ "step": 88
658
+ },
659
+ {
660
+ "epoch": 1.160081466395112,
661
+ "grad_norm": 0.5682183504104614,
662
+ "learning_rate": 0.00013693192173960253,
663
+ "loss": 1.6717,
664
+ "step": 89
665
+ },
666
+ {
667
+ "epoch": 1.1731160896130346,
668
+ "grad_norm": 0.48593777418136597,
669
+ "learning_rate": 0.00013417263253506147,
670
+ "loss": 2.5498,
671
+ "step": 90
672
+ },
673
+ {
674
+ "epoch": 1.1861507128309572,
675
+ "grad_norm": 0.517917811870575,
676
+ "learning_rate": 0.00013142686523274463,
677
+ "loss": 2.0097,
678
+ "step": 91
679
+ },
680
+ {
681
+ "epoch": 1.1991853360488798,
682
+ "grad_norm": 0.5828862190246582,
683
+ "learning_rate": 0.00012869582421693912,
684
+ "loss": 1.9987,
685
+ "step": 92
686
+ },
687
+ {
688
+ "epoch": 1.2122199592668024,
689
+ "grad_norm": 0.5273678302764893,
690
+ "learning_rate": 0.00012598070741249632,
691
+ "loss": 2.0205,
692
+ "step": 93
693
+ },
694
+ {
695
+ "epoch": 1.225254582484725,
696
+ "grad_norm": 0.49020346999168396,
697
+ "learning_rate": 0.00012328270575938212,
698
+ "loss": 1.9981,
699
+ "step": 94
700
+ },
701
+ {
702
+ "epoch": 1.2382892057026478,
703
+ "grad_norm": 0.35303086042404175,
704
+ "learning_rate": 0.00012060300269029128,
705
+ "loss": 1.9699,
706
+ "step": 95
707
+ },
708
+ {
709
+ "epoch": 1.2513238289205701,
710
+ "grad_norm": 0.27531367540359497,
711
+ "learning_rate": 0.00011794277361155495,
712
+ "loss": 2.0035,
713
+ "step": 96
714
+ },
715
+ {
716
+ "epoch": 1.264358452138493,
717
+ "grad_norm": 0.31903597712516785,
718
+ "learning_rate": 0.00011530318538756846,
719
+ "loss": 1.9783,
720
+ "step": 97
721
+ },
722
+ {
723
+ "epoch": 1.2773930753564156,
724
+ "grad_norm": 0.43081673979759216,
725
+ "learning_rate": 0.0001126853958289661,
726
+ "loss": 2.0053,
727
+ "step": 98
728
+ },
729
+ {
730
+ "epoch": 1.2904276985743381,
731
+ "grad_norm": 0.5114902257919312,
732
+ "learning_rate": 0.00011009055318476698,
733
+ "loss": 2.0032,
734
+ "step": 99
735
+ },
736
+ {
737
+ "epoch": 1.3034623217922607,
738
+ "grad_norm": 0.6454872488975525,
739
+ "learning_rate": 0.00010751979563871518,
740
+ "loss": 2.0436,
741
+ "step": 100
742
+ },
743
+ {
744
+ "epoch": 1.3034623217922607,
745
+ "eval_loss": 1.9786142110824585,
746
+ "eval_runtime": 0.2787,
747
+ "eval_samples_per_second": 179.386,
748
+ "eval_steps_per_second": 46.64,
749
+ "step": 100
750
  }
751
  ],
752
  "logging_steps": 1,
 
775
  "attributes": {}
776
  }
777
  },
778
+ "total_flos": 4.44280275468288e+16,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null