diaenra commited on
Commit
ab733b6
·
verified ·
1 Parent(s): 940f06f

Training in progress, step 1956, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e893d538403d4ac222e2baaf746a33535ee8031c07cf1939cc3355ea15106a0
3
  size 2503003904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e0a562a914d2be2b4ee279fe187629b69bd04971e3acab4f1c60939e5ec5996
3
  size 2503003904
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf245997dbc83cd89bcfb5067dfa742724b5f13ff1993cd0ad6d3d60a4c987a
3
  size 5006244836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9234c66d2cf43b48b78afca3f04cf7c13b9d3436c6cfdb169398c3ecd80cfe02
3
  size 5006244836
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12f09fa1a152c2febaa1b0be3c98d7abd70a22c5965d994af5b7173cc3e6ff7f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c25ffa179744c0719c7b65566206a3ffbc025b1b73bf62d6945f9035c21dfa
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c767cedc54b733779ba8a20f635d848598fd89e5cfee0706f6c63df8c1e6b2d8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98f465f8ef34d3200760108c9ddb9bd27e97b140bce5b4d84a91de037dadb420
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9775051124744376,
5
  "eval_steps": 500,
6
- "global_step": 1912,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -13391,6 +13391,314 @@
13391
  "learning_rate": 1.3860803461989146e-07,
13392
  "loss": 0.8676,
13393
  "step": 1912
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13394
  }
13395
  ],
13396
  "logging_steps": 1,
@@ -13405,12 +13713,12 @@
13405
  "should_evaluate": false,
13406
  "should_log": false,
13407
  "should_save": true,
13408
- "should_training_stop": false
13409
  },
13410
  "attributes": {}
13411
  }
13412
  },
13413
- "total_flos": 7.310480002095514e+17,
13414
  "train_batch_size": 8,
13415
  "trial_name": null,
13416
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 1956,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
13391
  "learning_rate": 1.3860803461989146e-07,
13392
  "loss": 0.8676,
13393
  "step": 1912
13394
+ },
13395
+ {
13396
+ "epoch": 0.9780163599182005,
13397
+ "grad_norm": 2.835423469543457,
13398
+ "learning_rate": 1.3238201455040844e-07,
13399
+ "loss": 0.8435,
13400
+ "step": 1913
13401
+ },
13402
+ {
13403
+ "epoch": 0.9785276073619632,
13404
+ "grad_norm": 3.2276833057403564,
13405
+ "learning_rate": 1.2629887148061792e-07,
13406
+ "loss": 0.9043,
13407
+ "step": 1914
13408
+ },
13409
+ {
13410
+ "epoch": 0.9790388548057259,
13411
+ "grad_norm": 3.260972261428833,
13412
+ "learning_rate": 1.203586228395004e-07,
13413
+ "loss": 0.9502,
13414
+ "step": 1915
13415
+ },
13416
+ {
13417
+ "epoch": 0.9795501022494888,
13418
+ "grad_norm": 3.3064229488372803,
13419
+ "learning_rate": 1.1456128564660273e-07,
13420
+ "loss": 0.9968,
13421
+ "step": 1916
13422
+ },
13423
+ {
13424
+ "epoch": 0.9800613496932515,
13425
+ "grad_norm": 3.274178981781006,
13426
+ "learning_rate": 1.0890687651203823e-07,
13427
+ "loss": 0.8302,
13428
+ "step": 1917
13429
+ },
13430
+ {
13431
+ "epoch": 0.9805725971370143,
13432
+ "grad_norm": 3.076536178588867,
13433
+ "learning_rate": 1.0339541163639776e-07,
13434
+ "loss": 0.9421,
13435
+ "step": 1918
13436
+ },
13437
+ {
13438
+ "epoch": 0.9810838445807771,
13439
+ "grad_norm": 3.247903823852539,
13440
+ "learning_rate": 9.802690681071647e-08,
13441
+ "loss": 0.9819,
13442
+ "step": 1919
13443
+ },
13444
+ {
13445
+ "epoch": 0.9815950920245399,
13446
+ "grad_norm": 3.3538260459899902,
13447
+ "learning_rate": 9.280137741643491e-08,
13448
+ "loss": 0.8744,
13449
+ "step": 1920
13450
+ },
13451
+ {
13452
+ "epoch": 0.9821063394683026,
13453
+ "grad_norm": 3.515782356262207,
13454
+ "learning_rate": 8.771883842536021e-08,
13455
+ "loss": 0.9124,
13456
+ "step": 1921
13457
+ },
13458
+ {
13459
+ "epoch": 0.9826175869120655,
13460
+ "grad_norm": 3.6226806640625,
13461
+ "learning_rate": 8.277930439959946e-08,
13462
+ "loss": 0.9011,
13463
+ "step": 1922
13464
+ },
13465
+ {
13466
+ "epoch": 0.9831288343558282,
13467
+ "grad_norm": 3.3394203186035156,
13468
+ "learning_rate": 7.798278949154303e-08,
13469
+ "loss": 0.8316,
13470
+ "step": 1923
13471
+ },
13472
+ {
13473
+ "epoch": 0.983640081799591,
13474
+ "grad_norm": 3.246371030807495,
13475
+ "learning_rate": 7.332930744380906e-08,
13476
+ "loss": 0.8556,
13477
+ "step": 1924
13478
+ },
13479
+ {
13480
+ "epoch": 0.9841513292433538,
13481
+ "grad_norm": 3.402927875518799,
13482
+ "learning_rate": 6.881887158920464e-08,
13483
+ "loss": 0.7978,
13484
+ "step": 1925
13485
+ },
13486
+ {
13487
+ "epoch": 0.9846625766871165,
13488
+ "grad_norm": 3.8112809658050537,
13489
+ "learning_rate": 6.445149485070357e-08,
13490
+ "loss": 0.9133,
13491
+ "step": 1926
13492
+ },
13493
+ {
13494
+ "epoch": 0.9851738241308794,
13495
+ "grad_norm": 3.5460119247436523,
13496
+ "learning_rate": 6.022718974137975e-08,
13497
+ "loss": 0.8158,
13498
+ "step": 1927
13499
+ },
13500
+ {
13501
+ "epoch": 0.9856850715746421,
13502
+ "grad_norm": 3.341395854949951,
13503
+ "learning_rate": 5.614596836440722e-08,
13504
+ "loss": 0.8246,
13505
+ "step": 1928
13506
+ },
13507
+ {
13508
+ "epoch": 0.9861963190184049,
13509
+ "grad_norm": 3.6873090267181396,
13510
+ "learning_rate": 5.2207842412999034e-08,
13511
+ "loss": 0.8714,
13512
+ "step": 1929
13513
+ },
13514
+ {
13515
+ "epoch": 0.9867075664621677,
13516
+ "grad_norm": 3.4815688133239746,
13517
+ "learning_rate": 4.841282317037399e-08,
13518
+ "loss": 0.8948,
13519
+ "step": 1930
13520
+ },
13521
+ {
13522
+ "epoch": 0.9872188139059305,
13523
+ "grad_norm": 3.5316038131713867,
13524
+ "learning_rate": 4.476092150975109e-08,
13525
+ "loss": 0.8622,
13526
+ "step": 1931
13527
+ },
13528
+ {
13529
+ "epoch": 0.9877300613496932,
13530
+ "grad_norm": 3.5975794792175293,
13531
+ "learning_rate": 4.1252147894277336e-08,
13532
+ "loss": 0.881,
13533
+ "step": 1932
13534
+ },
13535
+ {
13536
+ "epoch": 0.9882413087934561,
13537
+ "grad_norm": 3.441171646118164,
13538
+ "learning_rate": 3.7886512377033334e-08,
13539
+ "loss": 0.8396,
13540
+ "step": 1933
13541
+ },
13542
+ {
13543
+ "epoch": 0.9887525562372188,
13544
+ "grad_norm": 3.8511383533477783,
13545
+ "learning_rate": 3.4664024600988835e-08,
13546
+ "loss": 0.9208,
13547
+ "step": 1934
13548
+ },
13549
+ {
13550
+ "epoch": 0.9892638036809815,
13551
+ "grad_norm": 3.8687822818756104,
13552
+ "learning_rate": 3.158469379898055e-08,
13553
+ "loss": 0.9135,
13554
+ "step": 1935
13555
+ },
13556
+ {
13557
+ "epoch": 0.9897750511247444,
13558
+ "grad_norm": 3.593276023864746,
13559
+ "learning_rate": 2.8648528793673302e-08,
13560
+ "loss": 0.8474,
13561
+ "step": 1936
13562
+ },
13563
+ {
13564
+ "epoch": 0.9902862985685071,
13565
+ "grad_norm": 4.0986127853393555,
13566
+ "learning_rate": 2.5855537997548917e-08,
13567
+ "loss": 0.8883,
13568
+ "step": 1937
13569
+ },
13570
+ {
13571
+ "epoch": 0.99079754601227,
13572
+ "grad_norm": 4.03285551071167,
13573
+ "learning_rate": 2.3205729412884016e-08,
13574
+ "loss": 0.7779,
13575
+ "step": 1938
13576
+ },
13577
+ {
13578
+ "epoch": 0.9913087934560327,
13579
+ "grad_norm": 4.346153736114502,
13580
+ "learning_rate": 2.0699110631711148e-08,
13581
+ "loss": 0.8757,
13582
+ "step": 1939
13583
+ },
13584
+ {
13585
+ "epoch": 0.9918200408997955,
13586
+ "grad_norm": 4.283609390258789,
13587
+ "learning_rate": 1.8335688835802167e-08,
13588
+ "loss": 0.8173,
13589
+ "step": 1940
13590
+ },
13591
+ {
13592
+ "epoch": 0.9923312883435583,
13593
+ "grad_norm": 4.301876068115234,
13594
+ "learning_rate": 1.6115470796662647e-08,
13595
+ "loss": 0.9134,
13596
+ "step": 1941
13597
+ },
13598
+ {
13599
+ "epoch": 0.9928425357873211,
13600
+ "grad_norm": 4.885223865509033,
13601
+ "learning_rate": 1.4038462875504143e-08,
13602
+ "loss": 0.8289,
13603
+ "step": 1942
13604
+ },
13605
+ {
13606
+ "epoch": 0.9933537832310838,
13607
+ "grad_norm": 4.63042688369751,
13608
+ "learning_rate": 1.2104671023199787e-08,
13609
+ "loss": 0.8625,
13610
+ "step": 1943
13611
+ },
13612
+ {
13613
+ "epoch": 0.9938650306748467,
13614
+ "grad_norm": 4.702084064483643,
13615
+ "learning_rate": 1.0314100780317581e-08,
13616
+ "loss": 0.9342,
13617
+ "step": 1944
13618
+ },
13619
+ {
13620
+ "epoch": 0.9943762781186094,
13621
+ "grad_norm": 4.4585771560668945,
13622
+ "learning_rate": 8.666757277064897e-09,
13623
+ "loss": 0.6828,
13624
+ "step": 1945
13625
+ },
13626
+ {
13627
+ "epoch": 0.9948875255623721,
13628
+ "grad_norm": 4.869369029998779,
13629
+ "learning_rate": 7.162645233282916e-09,
13630
+ "loss": 0.8505,
13631
+ "step": 1946
13632
+ },
13633
+ {
13634
+ "epoch": 0.995398773006135,
13635
+ "grad_norm": 4.623004913330078,
13636
+ "learning_rate": 5.8017689584521915e-09,
13637
+ "loss": 0.6772,
13638
+ "step": 1947
13639
+ },
13640
+ {
13641
+ "epoch": 0.9959100204498977,
13642
+ "grad_norm": 5.718740940093994,
13643
+ "learning_rate": 4.584132351642678e-09,
13644
+ "loss": 0.8251,
13645
+ "step": 1948
13646
+ },
13647
+ {
13648
+ "epoch": 0.9964212678936605,
13649
+ "grad_norm": 5.196649551391602,
13650
+ "learning_rate": 3.509738901547044e-09,
13651
+ "loss": 0.6039,
13652
+ "step": 1949
13653
+ },
13654
+ {
13655
+ "epoch": 0.9969325153374233,
13656
+ "grad_norm": 6.253082752227783,
13657
+ "learning_rate": 2.5785916864307092e-09,
13658
+ "loss": 0.5829,
13659
+ "step": 1950
13660
+ },
13661
+ {
13662
+ "epoch": 0.9974437627811861,
13663
+ "grad_norm": 2.393533229827881,
13664
+ "learning_rate": 1.7906933741484999e-09,
13665
+ "loss": 0.869,
13666
+ "step": 1951
13667
+ },
13668
+ {
13669
+ "epoch": 0.9979550102249489,
13670
+ "grad_norm": 2.994673728942871,
13671
+ "learning_rate": 1.1460462221279944e-09,
13672
+ "loss": 0.9103,
13673
+ "step": 1952
13674
+ },
13675
+ {
13676
+ "epoch": 0.9984662576687117,
13677
+ "grad_norm": 3.5846893787384033,
13678
+ "learning_rate": 6.446520773695231e-10,
13679
+ "loss": 0.8366,
13680
+ "step": 1953
13681
+ },
13682
+ {
13683
+ "epoch": 0.9989775051124744,
13684
+ "grad_norm": 3.5298497676849365,
13685
+ "learning_rate": 2.8651237642396414e-10,
13686
+ "loss": 0.7694,
13687
+ "step": 1954
13688
+ },
13689
+ {
13690
+ "epoch": 0.9994887525562373,
13691
+ "grad_norm": 3.686530828475952,
13692
+ "learning_rate": 7.162814541494811e-11,
13693
+ "loss": 0.67,
13694
+ "step": 1955
13695
+ },
13696
+ {
13697
+ "epoch": 1.0,
13698
+ "grad_norm": 5.693774223327637,
13699
+ "learning_rate": 0.0,
13700
+ "loss": 0.7087,
13701
+ "step": 1956
13702
  }
13703
  ],
13704
  "logging_steps": 1,
 
13713
  "should_evaluate": false,
13714
  "should_log": false,
13715
  "should_save": true,
13716
+ "should_training_stop": true
13717
  },
13718
  "attributes": {}
13719
  }
13720
  },
13721
+ "total_flos": 7.478583213010452e+17,
13722
  "train_batch_size": 8,
13723
  "trial_name": null,
13724
  "trial_params": null