gabrielaltay commited on
Commit
ceff349
1 Parent(s): 85d8d99

Training in progress, step 8088, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93f3cbe3539383258517ffe4d796cd2ea59b217bda392c96bc59ec14b13b4956
3
  size 654946216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bad36090d3fea6bf5c95ba42db0a60c3eebb874b00583f59320f54010495d068
3
  size 654946216
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a519e50d7090c152cd50ac4ed93fcc2e0c2176e250fd0b45104b650a9b7d7be6
3
  size 1310000698
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6081f069ca372d13b29b3847db2ea0377d5705d67197467dc5eb4bba64d27645
3
  size 1310000698
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a530f352eab124a5914d9e0abc72b290b5b7b4112d9d14b915fc62d08dcd476e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e93e92bbbccd397547149085c6875813b56fd6bea353a98cf0179f1892adb6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7002077767883645,
5
  "eval_steps": 500,
6
- "global_step": 7077,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12390,6 +12390,1777 @@
12390
  "learning_rate": 1.4994558226971406e-05,
12391
  "loss": 2.4362,
12392
  "step": 7076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12393
  }
12394
  ],
12395
  "logging_steps": 4,
@@ -12397,7 +14168,7 @@
12397
  "num_input_tokens_seen": 0,
12398
  "num_train_epochs": 1,
12399
  "save_steps": 1011,
12400
- "total_flos": 8.655197054592614e+16,
12401
  "train_batch_size": 4,
12402
  "trial_name": null,
12403
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8002374591867023,
5
  "eval_steps": 500,
6
+ "global_step": 8088,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12390
  "learning_rate": 1.4994558226971406e-05,
12391
  "loss": 2.4362,
12392
  "step": 7076
12393
+ },
12394
+ {
12395
+ "epoch": 0.7,
12396
+ "grad_norm": 1.5943050384521484,
12397
+ "learning_rate": 1.4974769961412883e-05,
12398
+ "loss": 2.5309,
12399
+ "step": 7080
12400
+ },
12401
+ {
12402
+ "epoch": 0.7,
12403
+ "grad_norm": 1.7004754543304443,
12404
+ "learning_rate": 1.4954981695854359e-05,
12405
+ "loss": 2.573,
12406
+ "step": 7084
12407
+ },
12408
+ {
12409
+ "epoch": 0.7,
12410
+ "grad_norm": 1.6682156324386597,
12411
+ "learning_rate": 1.4935193430295835e-05,
12412
+ "loss": 2.5718,
12413
+ "step": 7088
12414
+ },
12415
+ {
12416
+ "epoch": 0.7,
12417
+ "grad_norm": 1.489130973815918,
12418
+ "learning_rate": 1.4915405164737312e-05,
12419
+ "loss": 2.7728,
12420
+ "step": 7092
12421
+ },
12422
+ {
12423
+ "epoch": 0.7,
12424
+ "grad_norm": 1.72616708278656,
12425
+ "learning_rate": 1.4895616899178788e-05,
12426
+ "loss": 2.6457,
12427
+ "step": 7096
12428
+ },
12429
+ {
12430
+ "epoch": 0.7,
12431
+ "grad_norm": 1.5968120098114014,
12432
+ "learning_rate": 1.4875828633620264e-05,
12433
+ "loss": 2.4886,
12434
+ "step": 7100
12435
+ },
12436
+ {
12437
+ "epoch": 0.7,
12438
+ "grad_norm": 1.7065930366516113,
12439
+ "learning_rate": 1.485604036806174e-05,
12440
+ "loss": 2.6894,
12441
+ "step": 7104
12442
+ },
12443
+ {
12444
+ "epoch": 0.7,
12445
+ "grad_norm": 1.8437420129776,
12446
+ "learning_rate": 1.4836252102503217e-05,
12447
+ "loss": 2.6103,
12448
+ "step": 7108
12449
+ },
12450
+ {
12451
+ "epoch": 0.7,
12452
+ "grad_norm": 1.5512233972549438,
12453
+ "learning_rate": 1.4816463836944692e-05,
12454
+ "loss": 2.4657,
12455
+ "step": 7112
12456
+ },
12457
+ {
12458
+ "epoch": 0.7,
12459
+ "grad_norm": 1.6278142929077148,
12460
+ "learning_rate": 1.4796675571386168e-05,
12461
+ "loss": 2.7127,
12462
+ "step": 7116
12463
+ },
12464
+ {
12465
+ "epoch": 0.7,
12466
+ "grad_norm": 1.5600184202194214,
12467
+ "learning_rate": 1.4776887305827644e-05,
12468
+ "loss": 2.4707,
12469
+ "step": 7120
12470
+ },
12471
+ {
12472
+ "epoch": 0.7,
12473
+ "grad_norm": 1.5605080127716064,
12474
+ "learning_rate": 1.475709904026912e-05,
12475
+ "loss": 2.5009,
12476
+ "step": 7124
12477
+ },
12478
+ {
12479
+ "epoch": 0.71,
12480
+ "grad_norm": 1.6376603841781616,
12481
+ "learning_rate": 1.4737310774710597e-05,
12482
+ "loss": 2.4116,
12483
+ "step": 7128
12484
+ },
12485
+ {
12486
+ "epoch": 0.71,
12487
+ "grad_norm": 1.5430164337158203,
12488
+ "learning_rate": 1.4717522509152073e-05,
12489
+ "loss": 2.4315,
12490
+ "step": 7132
12491
+ },
12492
+ {
12493
+ "epoch": 0.71,
12494
+ "grad_norm": 1.9249303340911865,
12495
+ "learning_rate": 1.469773424359355e-05,
12496
+ "loss": 2.5128,
12497
+ "step": 7136
12498
+ },
12499
+ {
12500
+ "epoch": 0.71,
12501
+ "grad_norm": 1.914553165435791,
12502
+ "learning_rate": 1.4677945978035026e-05,
12503
+ "loss": 2.5726,
12504
+ "step": 7140
12505
+ },
12506
+ {
12507
+ "epoch": 0.71,
12508
+ "grad_norm": 1.6890403032302856,
12509
+ "learning_rate": 1.4658157712476502e-05,
12510
+ "loss": 2.5233,
12511
+ "step": 7144
12512
+ },
12513
+ {
12514
+ "epoch": 0.71,
12515
+ "grad_norm": 1.6215218305587769,
12516
+ "learning_rate": 1.463836944691798e-05,
12517
+ "loss": 2.4541,
12518
+ "step": 7148
12519
+ },
12520
+ {
12521
+ "epoch": 0.71,
12522
+ "grad_norm": 1.6023436784744263,
12523
+ "learning_rate": 1.4618581181359457e-05,
12524
+ "loss": 2.5825,
12525
+ "step": 7152
12526
+ },
12527
+ {
12528
+ "epoch": 0.71,
12529
+ "grad_norm": 1.6800665855407715,
12530
+ "learning_rate": 1.459879291580093e-05,
12531
+ "loss": 2.5275,
12532
+ "step": 7156
12533
+ },
12534
+ {
12535
+ "epoch": 0.71,
12536
+ "grad_norm": 1.5266000032424927,
12537
+ "learning_rate": 1.4579004650242406e-05,
12538
+ "loss": 2.6414,
12539
+ "step": 7160
12540
+ },
12541
+ {
12542
+ "epoch": 0.71,
12543
+ "grad_norm": 1.660843014717102,
12544
+ "learning_rate": 1.4559216384683882e-05,
12545
+ "loss": 2.4934,
12546
+ "step": 7164
12547
+ },
12548
+ {
12549
+ "epoch": 0.71,
12550
+ "grad_norm": 1.5643970966339111,
12551
+ "learning_rate": 1.4539428119125358e-05,
12552
+ "loss": 2.4847,
12553
+ "step": 7168
12554
+ },
12555
+ {
12556
+ "epoch": 0.71,
12557
+ "grad_norm": 1.4681652784347534,
12558
+ "learning_rate": 1.4519639853566835e-05,
12559
+ "loss": 2.423,
12560
+ "step": 7172
12561
+ },
12562
+ {
12563
+ "epoch": 0.71,
12564
+ "grad_norm": 1.5900237560272217,
12565
+ "learning_rate": 1.4499851588008311e-05,
12566
+ "loss": 2.5645,
12567
+ "step": 7176
12568
+ },
12569
+ {
12570
+ "epoch": 0.71,
12571
+ "grad_norm": 1.6541297435760498,
12572
+ "learning_rate": 1.4480063322449789e-05,
12573
+ "loss": 2.5434,
12574
+ "step": 7180
12575
+ },
12576
+ {
12577
+ "epoch": 0.71,
12578
+ "grad_norm": 1.5105384588241577,
12579
+ "learning_rate": 1.4460275056891265e-05,
12580
+ "loss": 2.6391,
12581
+ "step": 7184
12582
+ },
12583
+ {
12584
+ "epoch": 0.71,
12585
+ "grad_norm": 1.6248685121536255,
12586
+ "learning_rate": 1.4440486791332742e-05,
12587
+ "loss": 2.6772,
12588
+ "step": 7188
12589
+ },
12590
+ {
12591
+ "epoch": 0.71,
12592
+ "grad_norm": 1.559591293334961,
12593
+ "learning_rate": 1.4420698525774218e-05,
12594
+ "loss": 2.5175,
12595
+ "step": 7192
12596
+ },
12597
+ {
12598
+ "epoch": 0.71,
12599
+ "grad_norm": 1.73567533493042,
12600
+ "learning_rate": 1.4400910260215691e-05,
12601
+ "loss": 2.5799,
12602
+ "step": 7196
12603
+ },
12604
+ {
12605
+ "epoch": 0.71,
12606
+ "grad_norm": 1.602149248123169,
12607
+ "learning_rate": 1.4381121994657167e-05,
12608
+ "loss": 2.5475,
12609
+ "step": 7200
12610
+ },
12611
+ {
12612
+ "epoch": 0.71,
12613
+ "grad_norm": 1.6269313097000122,
12614
+ "learning_rate": 1.4361333729098644e-05,
12615
+ "loss": 2.5729,
12616
+ "step": 7204
12617
+ },
12618
+ {
12619
+ "epoch": 0.71,
12620
+ "grad_norm": 1.5919511318206787,
12621
+ "learning_rate": 1.4341545463540122e-05,
12622
+ "loss": 2.4055,
12623
+ "step": 7208
12624
+ },
12625
+ {
12626
+ "epoch": 0.71,
12627
+ "grad_norm": 1.9625754356384277,
12628
+ "learning_rate": 1.4321757197981598e-05,
12629
+ "loss": 2.6867,
12630
+ "step": 7212
12631
+ },
12632
+ {
12633
+ "epoch": 0.71,
12634
+ "grad_norm": 1.7777873277664185,
12635
+ "learning_rate": 1.4301968932423074e-05,
12636
+ "loss": 2.7518,
12637
+ "step": 7216
12638
+ },
12639
+ {
12640
+ "epoch": 0.71,
12641
+ "grad_norm": 1.632881760597229,
12642
+ "learning_rate": 1.428218066686455e-05,
12643
+ "loss": 2.3954,
12644
+ "step": 7220
12645
+ },
12646
+ {
12647
+ "epoch": 0.71,
12648
+ "grad_norm": 1.7910826206207275,
12649
+ "learning_rate": 1.4262392401306027e-05,
12650
+ "loss": 2.5477,
12651
+ "step": 7224
12652
+ },
12653
+ {
12654
+ "epoch": 0.72,
12655
+ "grad_norm": 1.6172887086868286,
12656
+ "learning_rate": 1.4242604135747503e-05,
12657
+ "loss": 2.6665,
12658
+ "step": 7228
12659
+ },
12660
+ {
12661
+ "epoch": 0.72,
12662
+ "grad_norm": 1.7775293588638306,
12663
+ "learning_rate": 1.422281587018898e-05,
12664
+ "loss": 2.5237,
12665
+ "step": 7232
12666
+ },
12667
+ {
12668
+ "epoch": 0.72,
12669
+ "grad_norm": 1.6558274030685425,
12670
+ "learning_rate": 1.4203027604630456e-05,
12671
+ "loss": 2.5909,
12672
+ "step": 7236
12673
+ },
12674
+ {
12675
+ "epoch": 0.72,
12676
+ "grad_norm": 1.6271092891693115,
12677
+ "learning_rate": 1.418323933907193e-05,
12678
+ "loss": 2.5299,
12679
+ "step": 7240
12680
+ },
12681
+ {
12682
+ "epoch": 0.72,
12683
+ "grad_norm": 1.5768835544586182,
12684
+ "learning_rate": 1.4163451073513407e-05,
12685
+ "loss": 2.5376,
12686
+ "step": 7244
12687
+ },
12688
+ {
12689
+ "epoch": 0.72,
12690
+ "grad_norm": 1.641741156578064,
12691
+ "learning_rate": 1.4143662807954883e-05,
12692
+ "loss": 2.5802,
12693
+ "step": 7248
12694
+ },
12695
+ {
12696
+ "epoch": 0.72,
12697
+ "grad_norm": 1.5866329669952393,
12698
+ "learning_rate": 1.412387454239636e-05,
12699
+ "loss": 2.4445,
12700
+ "step": 7252
12701
+ },
12702
+ {
12703
+ "epoch": 0.72,
12704
+ "grad_norm": 1.6858747005462646,
12705
+ "learning_rate": 1.4104086276837836e-05,
12706
+ "loss": 2.51,
12707
+ "step": 7256
12708
+ },
12709
+ {
12710
+ "epoch": 0.72,
12711
+ "grad_norm": 1.6350436210632324,
12712
+ "learning_rate": 1.4084298011279312e-05,
12713
+ "loss": 2.6297,
12714
+ "step": 7260
12715
+ },
12716
+ {
12717
+ "epoch": 0.72,
12718
+ "grad_norm": 1.7628724575042725,
12719
+ "learning_rate": 1.4064509745720788e-05,
12720
+ "loss": 2.6294,
12721
+ "step": 7264
12722
+ },
12723
+ {
12724
+ "epoch": 0.72,
12725
+ "grad_norm": 1.6731958389282227,
12726
+ "learning_rate": 1.4044721480162265e-05,
12727
+ "loss": 2.5577,
12728
+ "step": 7268
12729
+ },
12730
+ {
12731
+ "epoch": 0.72,
12732
+ "grad_norm": 1.7393099069595337,
12733
+ "learning_rate": 1.4024933214603741e-05,
12734
+ "loss": 2.6079,
12735
+ "step": 7272
12736
+ },
12737
+ {
12738
+ "epoch": 0.72,
12739
+ "grad_norm": 1.7639967203140259,
12740
+ "learning_rate": 1.4005144949045217e-05,
12741
+ "loss": 2.7389,
12742
+ "step": 7276
12743
+ },
12744
+ {
12745
+ "epoch": 0.72,
12746
+ "grad_norm": 1.5961958169937134,
12747
+ "learning_rate": 1.3985356683486692e-05,
12748
+ "loss": 2.5846,
12749
+ "step": 7280
12750
+ },
12751
+ {
12752
+ "epoch": 0.72,
12753
+ "grad_norm": 1.6526871919631958,
12754
+ "learning_rate": 1.3965568417928168e-05,
12755
+ "loss": 2.5965,
12756
+ "step": 7284
12757
+ },
12758
+ {
12759
+ "epoch": 0.72,
12760
+ "grad_norm": 1.7771737575531006,
12761
+ "learning_rate": 1.3945780152369645e-05,
12762
+ "loss": 2.5905,
12763
+ "step": 7288
12764
+ },
12765
+ {
12766
+ "epoch": 0.72,
12767
+ "grad_norm": 1.713395118713379,
12768
+ "learning_rate": 1.3925991886811121e-05,
12769
+ "loss": 2.5063,
12770
+ "step": 7292
12771
+ },
12772
+ {
12773
+ "epoch": 0.72,
12774
+ "grad_norm": 1.5294148921966553,
12775
+ "learning_rate": 1.3906203621252597e-05,
12776
+ "loss": 2.4081,
12777
+ "step": 7296
12778
+ },
12779
+ {
12780
+ "epoch": 0.72,
12781
+ "grad_norm": 1.6980494260787964,
12782
+ "learning_rate": 1.3886415355694074e-05,
12783
+ "loss": 2.4693,
12784
+ "step": 7300
12785
+ },
12786
+ {
12787
+ "epoch": 0.72,
12788
+ "grad_norm": 1.703873634338379,
12789
+ "learning_rate": 1.386662709013555e-05,
12790
+ "loss": 2.4529,
12791
+ "step": 7304
12792
+ },
12793
+ {
12794
+ "epoch": 0.72,
12795
+ "grad_norm": 1.5093457698822021,
12796
+ "learning_rate": 1.3846838824577026e-05,
12797
+ "loss": 2.439,
12798
+ "step": 7308
12799
+ },
12800
+ {
12801
+ "epoch": 0.72,
12802
+ "grad_norm": 1.6407102346420288,
12803
+ "learning_rate": 1.3827050559018503e-05,
12804
+ "loss": 2.4581,
12805
+ "step": 7312
12806
+ },
12807
+ {
12808
+ "epoch": 0.72,
12809
+ "grad_norm": 1.485217809677124,
12810
+ "learning_rate": 1.380726229345998e-05,
12811
+ "loss": 2.5162,
12812
+ "step": 7316
12813
+ },
12814
+ {
12815
+ "epoch": 0.72,
12816
+ "grad_norm": 1.5687867403030396,
12817
+ "learning_rate": 1.3787474027901457e-05,
12818
+ "loss": 2.4868,
12819
+ "step": 7320
12820
+ },
12821
+ {
12822
+ "epoch": 0.72,
12823
+ "grad_norm": 1.5972037315368652,
12824
+ "learning_rate": 1.376768576234293e-05,
12825
+ "loss": 2.5713,
12826
+ "step": 7324
12827
+ },
12828
+ {
12829
+ "epoch": 0.73,
12830
+ "grad_norm": 1.6357609033584595,
12831
+ "learning_rate": 1.3747897496784406e-05,
12832
+ "loss": 2.5051,
12833
+ "step": 7328
12834
+ },
12835
+ {
12836
+ "epoch": 0.73,
12837
+ "grad_norm": 1.7274508476257324,
12838
+ "learning_rate": 1.3728109231225883e-05,
12839
+ "loss": 2.596,
12840
+ "step": 7332
12841
+ },
12842
+ {
12843
+ "epoch": 0.73,
12844
+ "grad_norm": 1.9209715127944946,
12845
+ "learning_rate": 1.3708320965667359e-05,
12846
+ "loss": 2.7628,
12847
+ "step": 7336
12848
+ },
12849
+ {
12850
+ "epoch": 0.73,
12851
+ "grad_norm": 1.5816470384597778,
12852
+ "learning_rate": 1.3688532700108835e-05,
12853
+ "loss": 2.4371,
12854
+ "step": 7340
12855
+ },
12856
+ {
12857
+ "epoch": 0.73,
12858
+ "grad_norm": 1.6190577745437622,
12859
+ "learning_rate": 1.3668744434550313e-05,
12860
+ "loss": 2.4256,
12861
+ "step": 7344
12862
+ },
12863
+ {
12864
+ "epoch": 0.73,
12865
+ "grad_norm": 1.627508282661438,
12866
+ "learning_rate": 1.364895616899179e-05,
12867
+ "loss": 2.6653,
12868
+ "step": 7348
12869
+ },
12870
+ {
12871
+ "epoch": 0.73,
12872
+ "grad_norm": 1.6422009468078613,
12873
+ "learning_rate": 1.3629167903433266e-05,
12874
+ "loss": 2.6699,
12875
+ "step": 7352
12876
+ },
12877
+ {
12878
+ "epoch": 0.73,
12879
+ "grad_norm": 1.546067476272583,
12880
+ "learning_rate": 1.3609379637874742e-05,
12881
+ "loss": 2.4263,
12882
+ "step": 7356
12883
+ },
12884
+ {
12885
+ "epoch": 0.73,
12886
+ "grad_norm": 1.621297836303711,
12887
+ "learning_rate": 1.3589591372316218e-05,
12888
+ "loss": 2.4296,
12889
+ "step": 7360
12890
+ },
12891
+ {
12892
+ "epoch": 0.73,
12893
+ "grad_norm": 1.643576741218567,
12894
+ "learning_rate": 1.3569803106757691e-05,
12895
+ "loss": 2.5806,
12896
+ "step": 7364
12897
+ },
12898
+ {
12899
+ "epoch": 0.73,
12900
+ "grad_norm": 1.6136977672576904,
12901
+ "learning_rate": 1.3550014841199168e-05,
12902
+ "loss": 2.4887,
12903
+ "step": 7368
12904
+ },
12905
+ {
12906
+ "epoch": 0.73,
12907
+ "grad_norm": 1.704002857208252,
12908
+ "learning_rate": 1.3530226575640644e-05,
12909
+ "loss": 2.583,
12910
+ "step": 7372
12911
+ },
12912
+ {
12913
+ "epoch": 0.73,
12914
+ "grad_norm": 1.6694648265838623,
12915
+ "learning_rate": 1.3510438310082122e-05,
12916
+ "loss": 2.4127,
12917
+ "step": 7376
12918
+ },
12919
+ {
12920
+ "epoch": 0.73,
12921
+ "grad_norm": 1.6585307121276855,
12922
+ "learning_rate": 1.3490650044523598e-05,
12923
+ "loss": 2.4128,
12924
+ "step": 7380
12925
+ },
12926
+ {
12927
+ "epoch": 0.73,
12928
+ "grad_norm": 1.7365046739578247,
12929
+ "learning_rate": 1.3470861778965075e-05,
12930
+ "loss": 2.6269,
12931
+ "step": 7384
12932
+ },
12933
+ {
12934
+ "epoch": 0.73,
12935
+ "grad_norm": 1.6612383127212524,
12936
+ "learning_rate": 1.3451073513406551e-05,
12937
+ "loss": 2.5896,
12938
+ "step": 7388
12939
+ },
12940
+ {
12941
+ "epoch": 0.73,
12942
+ "grad_norm": 1.7151292562484741,
12943
+ "learning_rate": 1.3431285247848027e-05,
12944
+ "loss": 2.4705,
12945
+ "step": 7392
12946
+ },
12947
+ {
12948
+ "epoch": 0.73,
12949
+ "grad_norm": 1.608148455619812,
12950
+ "learning_rate": 1.3411496982289504e-05,
12951
+ "loss": 2.4956,
12952
+ "step": 7396
12953
+ },
12954
+ {
12955
+ "epoch": 0.73,
12956
+ "grad_norm": 1.9409472942352295,
12957
+ "learning_rate": 1.339170871673098e-05,
12958
+ "loss": 2.5163,
12959
+ "step": 7400
12960
+ },
12961
+ {
12962
+ "epoch": 0.73,
12963
+ "grad_norm": 1.6790823936462402,
12964
+ "learning_rate": 1.3371920451172456e-05,
12965
+ "loss": 2.5319,
12966
+ "step": 7404
12967
+ },
12968
+ {
12969
+ "epoch": 0.73,
12970
+ "grad_norm": 1.833620548248291,
12971
+ "learning_rate": 1.3352132185613931e-05,
12972
+ "loss": 2.5467,
12973
+ "step": 7408
12974
+ },
12975
+ {
12976
+ "epoch": 0.73,
12977
+ "grad_norm": 1.7631961107254028,
12978
+ "learning_rate": 1.3332343920055407e-05,
12979
+ "loss": 2.5748,
12980
+ "step": 7412
12981
+ },
12982
+ {
12983
+ "epoch": 0.73,
12984
+ "grad_norm": 1.5768530368804932,
12985
+ "learning_rate": 1.3312555654496884e-05,
12986
+ "loss": 2.4908,
12987
+ "step": 7416
12988
+ },
12989
+ {
12990
+ "epoch": 0.73,
12991
+ "grad_norm": 1.6658759117126465,
12992
+ "learning_rate": 1.329276738893836e-05,
12993
+ "loss": 2.5698,
12994
+ "step": 7420
12995
+ },
12996
+ {
12997
+ "epoch": 0.73,
12998
+ "grad_norm": 1.7477085590362549,
12999
+ "learning_rate": 1.3272979123379836e-05,
13000
+ "loss": 2.3864,
13001
+ "step": 7424
13002
+ },
13003
+ {
13004
+ "epoch": 0.73,
13005
+ "grad_norm": 1.6787872314453125,
13006
+ "learning_rate": 1.3253190857821313e-05,
13007
+ "loss": 2.5065,
13008
+ "step": 7428
13009
+ },
13010
+ {
13011
+ "epoch": 0.74,
13012
+ "grad_norm": 1.6128181219100952,
13013
+ "learning_rate": 1.3233402592262789e-05,
13014
+ "loss": 2.6635,
13015
+ "step": 7432
13016
+ },
13017
+ {
13018
+ "epoch": 0.74,
13019
+ "grad_norm": 1.4591010808944702,
13020
+ "learning_rate": 1.3213614326704265e-05,
13021
+ "loss": 2.4067,
13022
+ "step": 7436
13023
+ },
13024
+ {
13025
+ "epoch": 0.74,
13026
+ "grad_norm": 1.7754063606262207,
13027
+ "learning_rate": 1.3193826061145742e-05,
13028
+ "loss": 2.5308,
13029
+ "step": 7440
13030
+ },
13031
+ {
13032
+ "epoch": 0.74,
13033
+ "grad_norm": 1.708154559135437,
13034
+ "learning_rate": 1.3174037795587218e-05,
13035
+ "loss": 2.4604,
13036
+ "step": 7444
13037
+ },
13038
+ {
13039
+ "epoch": 0.74,
13040
+ "grad_norm": 1.539201021194458,
13041
+ "learning_rate": 1.3154249530028694e-05,
13042
+ "loss": 2.5372,
13043
+ "step": 7448
13044
+ },
13045
+ {
13046
+ "epoch": 0.74,
13047
+ "grad_norm": 1.8630115985870361,
13048
+ "learning_rate": 1.3134461264470169e-05,
13049
+ "loss": 2.5022,
13050
+ "step": 7452
13051
+ },
13052
+ {
13053
+ "epoch": 0.74,
13054
+ "grad_norm": 1.7771594524383545,
13055
+ "learning_rate": 1.3114672998911645e-05,
13056
+ "loss": 2.3826,
13057
+ "step": 7456
13058
+ },
13059
+ {
13060
+ "epoch": 0.74,
13061
+ "grad_norm": 1.8175123929977417,
13062
+ "learning_rate": 1.3094884733353121e-05,
13063
+ "loss": 2.5922,
13064
+ "step": 7460
13065
+ },
13066
+ {
13067
+ "epoch": 0.74,
13068
+ "grad_norm": 1.4767266511917114,
13069
+ "learning_rate": 1.3075096467794598e-05,
13070
+ "loss": 2.5334,
13071
+ "step": 7464
13072
+ },
13073
+ {
13074
+ "epoch": 0.74,
13075
+ "grad_norm": 1.7710050344467163,
13076
+ "learning_rate": 1.3055308202236074e-05,
13077
+ "loss": 2.5901,
13078
+ "step": 7468
13079
+ },
13080
+ {
13081
+ "epoch": 0.74,
13082
+ "grad_norm": 1.78278648853302,
13083
+ "learning_rate": 1.303551993667755e-05,
13084
+ "loss": 2.5621,
13085
+ "step": 7472
13086
+ },
13087
+ {
13088
+ "epoch": 0.74,
13089
+ "grad_norm": 1.553341269493103,
13090
+ "learning_rate": 1.3015731671119027e-05,
13091
+ "loss": 2.5296,
13092
+ "step": 7476
13093
+ },
13094
+ {
13095
+ "epoch": 0.74,
13096
+ "grad_norm": 1.4547873735427856,
13097
+ "learning_rate": 1.2995943405560505e-05,
13098
+ "loss": 2.5812,
13099
+ "step": 7480
13100
+ },
13101
+ {
13102
+ "epoch": 0.74,
13103
+ "grad_norm": 1.6034144163131714,
13104
+ "learning_rate": 1.2976155140001981e-05,
13105
+ "loss": 2.3507,
13106
+ "step": 7484
13107
+ },
13108
+ {
13109
+ "epoch": 0.74,
13110
+ "grad_norm": 1.6461652517318726,
13111
+ "learning_rate": 1.2956366874443457e-05,
13112
+ "loss": 2.6291,
13113
+ "step": 7488
13114
+ },
13115
+ {
13116
+ "epoch": 0.74,
13117
+ "grad_norm": 1.6804616451263428,
13118
+ "learning_rate": 1.293657860888493e-05,
13119
+ "loss": 2.4514,
13120
+ "step": 7492
13121
+ },
13122
+ {
13123
+ "epoch": 0.74,
13124
+ "grad_norm": 1.5887171030044556,
13125
+ "learning_rate": 1.2916790343326407e-05,
13126
+ "loss": 2.386,
13127
+ "step": 7496
13128
+ },
13129
+ {
13130
+ "epoch": 0.74,
13131
+ "grad_norm": 1.6213395595550537,
13132
+ "learning_rate": 1.2897002077767883e-05,
13133
+ "loss": 2.5569,
13134
+ "step": 7500
13135
+ },
13136
+ {
13137
+ "epoch": 0.74,
13138
+ "grad_norm": 1.7485177516937256,
13139
+ "learning_rate": 1.287721381220936e-05,
13140
+ "loss": 2.5454,
13141
+ "step": 7504
13142
+ },
13143
+ {
13144
+ "epoch": 0.74,
13145
+ "grad_norm": 1.660869836807251,
13146
+ "learning_rate": 1.2857425546650836e-05,
13147
+ "loss": 2.5177,
13148
+ "step": 7508
13149
+ },
13150
+ {
13151
+ "epoch": 0.74,
13152
+ "grad_norm": 1.674647331237793,
13153
+ "learning_rate": 1.2837637281092314e-05,
13154
+ "loss": 2.3855,
13155
+ "step": 7512
13156
+ },
13157
+ {
13158
+ "epoch": 0.74,
13159
+ "grad_norm": 2.053084373474121,
13160
+ "learning_rate": 1.281784901553379e-05,
13161
+ "loss": 2.7179,
13162
+ "step": 7516
13163
+ },
13164
+ {
13165
+ "epoch": 0.74,
13166
+ "grad_norm": 1.5778189897537231,
13167
+ "learning_rate": 1.2798060749975266e-05,
13168
+ "loss": 2.5341,
13169
+ "step": 7520
13170
+ },
13171
+ {
13172
+ "epoch": 0.74,
13173
+ "grad_norm": 1.670196294784546,
13174
+ "learning_rate": 1.2778272484416743e-05,
13175
+ "loss": 2.4186,
13176
+ "step": 7524
13177
+ },
13178
+ {
13179
+ "epoch": 0.74,
13180
+ "grad_norm": 1.7191470861434937,
13181
+ "learning_rate": 1.2758484218858219e-05,
13182
+ "loss": 2.4801,
13183
+ "step": 7528
13184
+ },
13185
+ {
13186
+ "epoch": 0.75,
13187
+ "grad_norm": 1.6857608556747437,
13188
+ "learning_rate": 1.2738695953299695e-05,
13189
+ "loss": 2.4867,
13190
+ "step": 7532
13191
+ },
13192
+ {
13193
+ "epoch": 0.75,
13194
+ "grad_norm": 1.6951642036437988,
13195
+ "learning_rate": 1.2718907687741168e-05,
13196
+ "loss": 2.5551,
13197
+ "step": 7536
13198
+ },
13199
+ {
13200
+ "epoch": 0.75,
13201
+ "grad_norm": 1.9836032390594482,
13202
+ "learning_rate": 1.2699119422182646e-05,
13203
+ "loss": 2.4202,
13204
+ "step": 7540
13205
+ },
13206
+ {
13207
+ "epoch": 0.75,
13208
+ "grad_norm": 1.6894361972808838,
13209
+ "learning_rate": 1.2679331156624122e-05,
13210
+ "loss": 2.5553,
13211
+ "step": 7544
13212
+ },
13213
+ {
13214
+ "epoch": 0.75,
13215
+ "grad_norm": 1.6373331546783447,
13216
+ "learning_rate": 1.2659542891065599e-05,
13217
+ "loss": 2.3549,
13218
+ "step": 7548
13219
+ },
13220
+ {
13221
+ "epoch": 0.75,
13222
+ "grad_norm": 1.5669854879379272,
13223
+ "learning_rate": 1.2639754625507075e-05,
13224
+ "loss": 2.4814,
13225
+ "step": 7552
13226
+ },
13227
+ {
13228
+ "epoch": 0.75,
13229
+ "grad_norm": 1.697510838508606,
13230
+ "learning_rate": 1.2619966359948551e-05,
13231
+ "loss": 2.5292,
13232
+ "step": 7556
13233
+ },
13234
+ {
13235
+ "epoch": 0.75,
13236
+ "grad_norm": 1.6051623821258545,
13237
+ "learning_rate": 1.2600178094390028e-05,
13238
+ "loss": 2.4635,
13239
+ "step": 7560
13240
+ },
13241
+ {
13242
+ "epoch": 0.75,
13243
+ "grad_norm": 1.550139307975769,
13244
+ "learning_rate": 1.2580389828831504e-05,
13245
+ "loss": 2.5052,
13246
+ "step": 7564
13247
+ },
13248
+ {
13249
+ "epoch": 0.75,
13250
+ "grad_norm": 1.8294285535812378,
13251
+ "learning_rate": 1.256060156327298e-05,
13252
+ "loss": 2.6251,
13253
+ "step": 7568
13254
+ },
13255
+ {
13256
+ "epoch": 0.75,
13257
+ "grad_norm": 1.731762170791626,
13258
+ "learning_rate": 1.2540813297714457e-05,
13259
+ "loss": 2.6812,
13260
+ "step": 7572
13261
+ },
13262
+ {
13263
+ "epoch": 0.75,
13264
+ "grad_norm": 1.7276403903961182,
13265
+ "learning_rate": 1.2521025032155931e-05,
13266
+ "loss": 2.5587,
13267
+ "step": 7576
13268
+ },
13269
+ {
13270
+ "epoch": 0.75,
13271
+ "grad_norm": 1.8602039813995361,
13272
+ "learning_rate": 1.2501236766597408e-05,
13273
+ "loss": 2.5649,
13274
+ "step": 7580
13275
+ },
13276
+ {
13277
+ "epoch": 0.75,
13278
+ "grad_norm": 1.5579040050506592,
13279
+ "learning_rate": 1.2481448501038886e-05,
13280
+ "loss": 2.6513,
13281
+ "step": 7584
13282
+ },
13283
+ {
13284
+ "epoch": 0.75,
13285
+ "grad_norm": 1.7131482362747192,
13286
+ "learning_rate": 1.246166023548036e-05,
13287
+ "loss": 2.4914,
13288
+ "step": 7588
13289
+ },
13290
+ {
13291
+ "epoch": 0.75,
13292
+ "grad_norm": 1.68119215965271,
13293
+ "learning_rate": 1.2441871969921837e-05,
13294
+ "loss": 2.5319,
13295
+ "step": 7592
13296
+ },
13297
+ {
13298
+ "epoch": 0.75,
13299
+ "grad_norm": 1.8896154165267944,
13300
+ "learning_rate": 1.2422083704363313e-05,
13301
+ "loss": 2.5683,
13302
+ "step": 7596
13303
+ },
13304
+ {
13305
+ "epoch": 0.75,
13306
+ "grad_norm": 1.5681391954421997,
13307
+ "learning_rate": 1.240229543880479e-05,
13308
+ "loss": 2.4047,
13309
+ "step": 7600
13310
+ },
13311
+ {
13312
+ "epoch": 0.75,
13313
+ "grad_norm": 1.4728906154632568,
13314
+ "learning_rate": 1.2382507173246266e-05,
13315
+ "loss": 2.5071,
13316
+ "step": 7604
13317
+ },
13318
+ {
13319
+ "epoch": 0.75,
13320
+ "grad_norm": 1.5875272750854492,
13321
+ "learning_rate": 1.2362718907687742e-05,
13322
+ "loss": 2.5415,
13323
+ "step": 7608
13324
+ },
13325
+ {
13326
+ "epoch": 0.75,
13327
+ "grad_norm": 1.6239516735076904,
13328
+ "learning_rate": 1.2342930642129218e-05,
13329
+ "loss": 2.4362,
13330
+ "step": 7612
13331
+ },
13332
+ {
13333
+ "epoch": 0.75,
13334
+ "grad_norm": 1.812610149383545,
13335
+ "learning_rate": 1.2323142376570695e-05,
13336
+ "loss": 2.5034,
13337
+ "step": 7616
13338
+ },
13339
+ {
13340
+ "epoch": 0.75,
13341
+ "grad_norm": 1.6190868616104126,
13342
+ "learning_rate": 1.2303354111012171e-05,
13343
+ "loss": 2.4786,
13344
+ "step": 7620
13345
+ },
13346
+ {
13347
+ "epoch": 0.75,
13348
+ "grad_norm": 1.6609690189361572,
13349
+ "learning_rate": 1.2283565845453647e-05,
13350
+ "loss": 2.372,
13351
+ "step": 7624
13352
+ },
13353
+ {
13354
+ "epoch": 0.75,
13355
+ "grad_norm": 1.700492024421692,
13356
+ "learning_rate": 1.2263777579895122e-05,
13357
+ "loss": 2.4527,
13358
+ "step": 7628
13359
+ },
13360
+ {
13361
+ "epoch": 0.76,
13362
+ "grad_norm": 1.8237019777297974,
13363
+ "learning_rate": 1.2243989314336598e-05,
13364
+ "loss": 2.548,
13365
+ "step": 7632
13366
+ },
13367
+ {
13368
+ "epoch": 0.76,
13369
+ "grad_norm": 1.558048129081726,
13370
+ "learning_rate": 1.2224201048778074e-05,
13371
+ "loss": 2.5439,
13372
+ "step": 7636
13373
+ },
13374
+ {
13375
+ "epoch": 0.76,
13376
+ "grad_norm": 1.7001163959503174,
13377
+ "learning_rate": 1.220441278321955e-05,
13378
+ "loss": 2.5146,
13379
+ "step": 7640
13380
+ },
13381
+ {
13382
+ "epoch": 0.76,
13383
+ "grad_norm": 1.5829248428344727,
13384
+ "learning_rate": 1.2184624517661027e-05,
13385
+ "loss": 2.5473,
13386
+ "step": 7644
13387
+ },
13388
+ {
13389
+ "epoch": 0.76,
13390
+ "grad_norm": 1.5922554731369019,
13391
+ "learning_rate": 1.2164836252102505e-05,
13392
+ "loss": 2.5482,
13393
+ "step": 7648
13394
+ },
13395
+ {
13396
+ "epoch": 0.76,
13397
+ "grad_norm": 1.5552897453308105,
13398
+ "learning_rate": 1.214504798654398e-05,
13399
+ "loss": 2.7146,
13400
+ "step": 7652
13401
+ },
13402
+ {
13403
+ "epoch": 0.76,
13404
+ "grad_norm": 1.6641972064971924,
13405
+ "learning_rate": 1.2125259720985456e-05,
13406
+ "loss": 2.5317,
13407
+ "step": 7656
13408
+ },
13409
+ {
13410
+ "epoch": 0.76,
13411
+ "grad_norm": 1.727538824081421,
13412
+ "learning_rate": 1.2105471455426932e-05,
13413
+ "loss": 2.511,
13414
+ "step": 7660
13415
+ },
13416
+ {
13417
+ "epoch": 0.76,
13418
+ "grad_norm": 1.7466334104537964,
13419
+ "learning_rate": 1.2085683189868409e-05,
13420
+ "loss": 2.6592,
13421
+ "step": 7664
13422
+ },
13423
+ {
13424
+ "epoch": 0.76,
13425
+ "grad_norm": 1.578805923461914,
13426
+ "learning_rate": 1.2065894924309885e-05,
13427
+ "loss": 2.5405,
13428
+ "step": 7668
13429
+ },
13430
+ {
13431
+ "epoch": 0.76,
13432
+ "grad_norm": 1.7704498767852783,
13433
+ "learning_rate": 1.204610665875136e-05,
13434
+ "loss": 2.7104,
13435
+ "step": 7672
13436
+ },
13437
+ {
13438
+ "epoch": 0.76,
13439
+ "grad_norm": 1.901253342628479,
13440
+ "learning_rate": 1.2026318393192838e-05,
13441
+ "loss": 2.68,
13442
+ "step": 7676
13443
+ },
13444
+ {
13445
+ "epoch": 0.76,
13446
+ "grad_norm": 1.6251075267791748,
13447
+ "learning_rate": 1.2006530127634314e-05,
13448
+ "loss": 2.5327,
13449
+ "step": 7680
13450
+ },
13451
+ {
13452
+ "epoch": 0.76,
13453
+ "grad_norm": 1.9433711767196655,
13454
+ "learning_rate": 1.198674186207579e-05,
13455
+ "loss": 2.664,
13456
+ "step": 7684
13457
+ },
13458
+ {
13459
+ "epoch": 0.76,
13460
+ "grad_norm": 1.6232560873031616,
13461
+ "learning_rate": 1.1966953596517267e-05,
13462
+ "loss": 2.3881,
13463
+ "step": 7688
13464
+ },
13465
+ {
13466
+ "epoch": 0.76,
13467
+ "grad_norm": 1.625562310218811,
13468
+ "learning_rate": 1.1947165330958741e-05,
13469
+ "loss": 2.3981,
13470
+ "step": 7692
13471
+ },
13472
+ {
13473
+ "epoch": 0.76,
13474
+ "grad_norm": 1.8203229904174805,
13475
+ "learning_rate": 1.1927377065400218e-05,
13476
+ "loss": 2.5587,
13477
+ "step": 7696
13478
+ },
13479
+ {
13480
+ "epoch": 0.76,
13481
+ "grad_norm": 1.5125036239624023,
13482
+ "learning_rate": 1.1907588799841694e-05,
13483
+ "loss": 2.548,
13484
+ "step": 7700
13485
+ },
13486
+ {
13487
+ "epoch": 0.76,
13488
+ "grad_norm": 1.7927870750427246,
13489
+ "learning_rate": 1.188780053428317e-05,
13490
+ "loss": 2.5317,
13491
+ "step": 7704
13492
+ },
13493
+ {
13494
+ "epoch": 0.76,
13495
+ "grad_norm": 1.853130578994751,
13496
+ "learning_rate": 1.1868012268724647e-05,
13497
+ "loss": 2.5032,
13498
+ "step": 7708
13499
+ },
13500
+ {
13501
+ "epoch": 0.76,
13502
+ "grad_norm": 1.8347195386886597,
13503
+ "learning_rate": 1.1848224003166123e-05,
13504
+ "loss": 2.3406,
13505
+ "step": 7712
13506
+ },
13507
+ {
13508
+ "epoch": 0.76,
13509
+ "grad_norm": 1.6292403936386108,
13510
+ "learning_rate": 1.18284357376076e-05,
13511
+ "loss": 2.4874,
13512
+ "step": 7716
13513
+ },
13514
+ {
13515
+ "epoch": 0.76,
13516
+ "grad_norm": 1.6553095579147339,
13517
+ "learning_rate": 1.1808647472049076e-05,
13518
+ "loss": 2.5065,
13519
+ "step": 7720
13520
+ },
13521
+ {
13522
+ "epoch": 0.76,
13523
+ "grad_norm": 1.9831068515777588,
13524
+ "learning_rate": 1.1788859206490552e-05,
13525
+ "loss": 2.5079,
13526
+ "step": 7724
13527
+ },
13528
+ {
13529
+ "epoch": 0.76,
13530
+ "grad_norm": 1.7396299839019775,
13531
+ "learning_rate": 1.1769070940932028e-05,
13532
+ "loss": 2.5171,
13533
+ "step": 7728
13534
+ },
13535
+ {
13536
+ "epoch": 0.77,
13537
+ "grad_norm": 1.8842509984970093,
13538
+ "learning_rate": 1.1749282675373505e-05,
13539
+ "loss": 2.6561,
13540
+ "step": 7732
13541
+ },
13542
+ {
13543
+ "epoch": 0.77,
13544
+ "grad_norm": 1.5185445547103882,
13545
+ "learning_rate": 1.1729494409814979e-05,
13546
+ "loss": 2.6638,
13547
+ "step": 7736
13548
+ },
13549
+ {
13550
+ "epoch": 0.77,
13551
+ "grad_norm": 1.7892464399337769,
13552
+ "learning_rate": 1.1709706144256455e-05,
13553
+ "loss": 2.4509,
13554
+ "step": 7740
13555
+ },
13556
+ {
13557
+ "epoch": 0.77,
13558
+ "grad_norm": 1.6770362854003906,
13559
+ "learning_rate": 1.1689917878697933e-05,
13560
+ "loss": 2.536,
13561
+ "step": 7744
13562
+ },
13563
+ {
13564
+ "epoch": 0.77,
13565
+ "grad_norm": 1.6587915420532227,
13566
+ "learning_rate": 1.167012961313941e-05,
13567
+ "loss": 2.515,
13568
+ "step": 7748
13569
+ },
13570
+ {
13571
+ "epoch": 0.77,
13572
+ "grad_norm": 1.7976752519607544,
13573
+ "learning_rate": 1.1650341347580886e-05,
13574
+ "loss": 2.4524,
13575
+ "step": 7752
13576
+ },
13577
+ {
13578
+ "epoch": 0.77,
13579
+ "grad_norm": 1.7145442962646484,
13580
+ "learning_rate": 1.163055308202236e-05,
13581
+ "loss": 2.56,
13582
+ "step": 7756
13583
+ },
13584
+ {
13585
+ "epoch": 0.77,
13586
+ "grad_norm": 1.6737264394760132,
13587
+ "learning_rate": 1.1610764816463837e-05,
13588
+ "loss": 2.45,
13589
+ "step": 7760
13590
+ },
13591
+ {
13592
+ "epoch": 0.77,
13593
+ "grad_norm": 1.683347225189209,
13594
+ "learning_rate": 1.1590976550905313e-05,
13595
+ "loss": 2.4508,
13596
+ "step": 7764
13597
+ },
13598
+ {
13599
+ "epoch": 0.77,
13600
+ "grad_norm": 1.6320801973342896,
13601
+ "learning_rate": 1.157118828534679e-05,
13602
+ "loss": 2.6438,
13603
+ "step": 7768
13604
+ },
13605
+ {
13606
+ "epoch": 0.77,
13607
+ "grad_norm": 1.8501825332641602,
13608
+ "learning_rate": 1.1551400019788266e-05,
13609
+ "loss": 2.4262,
13610
+ "step": 7772
13611
+ },
13612
+ {
13613
+ "epoch": 0.77,
13614
+ "grad_norm": 1.8582115173339844,
13615
+ "learning_rate": 1.1531611754229742e-05,
13616
+ "loss": 2.4314,
13617
+ "step": 7776
13618
+ },
13619
+ {
13620
+ "epoch": 0.77,
13621
+ "grad_norm": 1.5969737768173218,
13622
+ "learning_rate": 1.1511823488671219e-05,
13623
+ "loss": 2.4917,
13624
+ "step": 7780
13625
+ },
13626
+ {
13627
+ "epoch": 0.77,
13628
+ "grad_norm": 1.5447652339935303,
13629
+ "learning_rate": 1.1492035223112695e-05,
13630
+ "loss": 2.5134,
13631
+ "step": 7784
13632
+ },
13633
+ {
13634
+ "epoch": 0.77,
13635
+ "grad_norm": 1.7579408884048462,
13636
+ "learning_rate": 1.1472246957554171e-05,
13637
+ "loss": 2.4526,
13638
+ "step": 7788
13639
+ },
13640
+ {
13641
+ "epoch": 0.77,
13642
+ "grad_norm": 1.6958955526351929,
13643
+ "learning_rate": 1.1452458691995648e-05,
13644
+ "loss": 2.5719,
13645
+ "step": 7792
13646
+ },
13647
+ {
13648
+ "epoch": 0.77,
13649
+ "grad_norm": 1.6478573083877563,
13650
+ "learning_rate": 1.1432670426437124e-05,
13651
+ "loss": 2.5227,
13652
+ "step": 7796
13653
+ },
13654
+ {
13655
+ "epoch": 0.77,
13656
+ "grad_norm": 1.5438200235366821,
13657
+ "learning_rate": 1.1412882160878599e-05,
13658
+ "loss": 2.3835,
13659
+ "step": 7800
13660
+ },
13661
+ {
13662
+ "epoch": 0.77,
13663
+ "grad_norm": 1.5718754529953003,
13664
+ "learning_rate": 1.1393093895320075e-05,
13665
+ "loss": 2.4568,
13666
+ "step": 7804
13667
+ },
13668
+ {
13669
+ "epoch": 0.77,
13670
+ "grad_norm": 1.6531394720077515,
13671
+ "learning_rate": 1.1373305629761551e-05,
13672
+ "loss": 2.3707,
13673
+ "step": 7808
13674
+ },
13675
+ {
13676
+ "epoch": 0.77,
13677
+ "grad_norm": 1.8568472862243652,
13678
+ "learning_rate": 1.135351736420303e-05,
13679
+ "loss": 2.583,
13680
+ "step": 7812
13681
+ },
13682
+ {
13683
+ "epoch": 0.77,
13684
+ "grad_norm": 1.7812650203704834,
13685
+ "learning_rate": 1.1333729098644506e-05,
13686
+ "loss": 2.5865,
13687
+ "step": 7816
13688
+ },
13689
+ {
13690
+ "epoch": 0.77,
13691
+ "grad_norm": 1.7314536571502686,
13692
+ "learning_rate": 1.131394083308598e-05,
13693
+ "loss": 2.5384,
13694
+ "step": 7820
13695
+ },
13696
+ {
13697
+ "epoch": 0.77,
13698
+ "grad_norm": 1.8775840997695923,
13699
+ "learning_rate": 1.1294152567527457e-05,
13700
+ "loss": 2.6276,
13701
+ "step": 7824
13702
+ },
13703
+ {
13704
+ "epoch": 0.77,
13705
+ "grad_norm": 1.3948659896850586,
13706
+ "learning_rate": 1.1274364301968933e-05,
13707
+ "loss": 2.5302,
13708
+ "step": 7828
13709
+ },
13710
+ {
13711
+ "epoch": 0.77,
13712
+ "grad_norm": 1.690691590309143,
13713
+ "learning_rate": 1.125457603641041e-05,
13714
+ "loss": 2.5569,
13715
+ "step": 7832
13716
+ },
13717
+ {
13718
+ "epoch": 0.78,
13719
+ "grad_norm": 1.606754183769226,
13720
+ "learning_rate": 1.1234787770851885e-05,
13721
+ "loss": 2.3866,
13722
+ "step": 7836
13723
+ },
13724
+ {
13725
+ "epoch": 0.78,
13726
+ "grad_norm": 1.5848886966705322,
13727
+ "learning_rate": 1.1214999505293362e-05,
13728
+ "loss": 2.5407,
13729
+ "step": 7840
13730
+ },
13731
+ {
13732
+ "epoch": 0.78,
13733
+ "grad_norm": 1.5651729106903076,
13734
+ "learning_rate": 1.1195211239734838e-05,
13735
+ "loss": 2.2891,
13736
+ "step": 7844
13737
+ },
13738
+ {
13739
+ "epoch": 0.78,
13740
+ "grad_norm": 1.704177975654602,
13741
+ "learning_rate": 1.1175422974176314e-05,
13742
+ "loss": 2.6963,
13743
+ "step": 7848
13744
+ },
13745
+ {
13746
+ "epoch": 0.78,
13747
+ "grad_norm": 1.605334997177124,
13748
+ "learning_rate": 1.115563470861779e-05,
13749
+ "loss": 2.5576,
13750
+ "step": 7852
13751
+ },
13752
+ {
13753
+ "epoch": 0.78,
13754
+ "grad_norm": 1.530823826789856,
13755
+ "learning_rate": 1.1135846443059267e-05,
13756
+ "loss": 2.4859,
13757
+ "step": 7856
13758
+ },
13759
+ {
13760
+ "epoch": 0.78,
13761
+ "grad_norm": 1.6835219860076904,
13762
+ "learning_rate": 1.1116058177500742e-05,
13763
+ "loss": 2.462,
13764
+ "step": 7860
13765
+ },
13766
+ {
13767
+ "epoch": 0.78,
13768
+ "grad_norm": 1.6420457363128662,
13769
+ "learning_rate": 1.1096269911942218e-05,
13770
+ "loss": 2.6175,
13771
+ "step": 7864
13772
+ },
13773
+ {
13774
+ "epoch": 0.78,
13775
+ "grad_norm": 1.6330996751785278,
13776
+ "learning_rate": 1.1076481646383694e-05,
13777
+ "loss": 2.6308,
13778
+ "step": 7868
13779
+ },
13780
+ {
13781
+ "epoch": 0.78,
13782
+ "grad_norm": 2.031244993209839,
13783
+ "learning_rate": 1.105669338082517e-05,
13784
+ "loss": 2.4599,
13785
+ "step": 7872
13786
+ },
13787
+ {
13788
+ "epoch": 0.78,
13789
+ "grad_norm": 1.5154794454574585,
13790
+ "learning_rate": 1.1036905115266647e-05,
13791
+ "loss": 2.4949,
13792
+ "step": 7876
13793
+ },
13794
+ {
13795
+ "epoch": 0.78,
13796
+ "grad_norm": 1.7397664785385132,
13797
+ "learning_rate": 1.1017116849708125e-05,
13798
+ "loss": 2.3962,
13799
+ "step": 7880
13800
+ },
13801
+ {
13802
+ "epoch": 0.78,
13803
+ "grad_norm": 1.7765430212020874,
13804
+ "learning_rate": 1.09973285841496e-05,
13805
+ "loss": 2.4214,
13806
+ "step": 7884
13807
+ },
13808
+ {
13809
+ "epoch": 0.78,
13810
+ "grad_norm": 1.6177083253860474,
13811
+ "learning_rate": 1.0977540318591076e-05,
13812
+ "loss": 2.5221,
13813
+ "step": 7888
13814
+ },
13815
+ {
13816
+ "epoch": 0.78,
13817
+ "grad_norm": 1.6060839891433716,
13818
+ "learning_rate": 1.0957752053032552e-05,
13819
+ "loss": 2.6571,
13820
+ "step": 7892
13821
+ },
13822
+ {
13823
+ "epoch": 0.78,
13824
+ "grad_norm": 1.6707481145858765,
13825
+ "learning_rate": 1.0937963787474029e-05,
13826
+ "loss": 2.373,
13827
+ "step": 7896
13828
+ },
13829
+ {
13830
+ "epoch": 0.78,
13831
+ "grad_norm": 1.8542253971099854,
13832
+ "learning_rate": 1.0918175521915505e-05,
13833
+ "loss": 2.4029,
13834
+ "step": 7900
13835
+ },
13836
+ {
13837
+ "epoch": 0.78,
13838
+ "grad_norm": 1.6408201456069946,
13839
+ "learning_rate": 1.089838725635698e-05,
13840
+ "loss": 2.4065,
13841
+ "step": 7904
13842
+ },
13843
+ {
13844
+ "epoch": 0.78,
13845
+ "grad_norm": 1.5829615592956543,
13846
+ "learning_rate": 1.0878598990798458e-05,
13847
+ "loss": 2.6859,
13848
+ "step": 7908
13849
+ },
13850
+ {
13851
+ "epoch": 0.78,
13852
+ "grad_norm": 1.5252026319503784,
13853
+ "learning_rate": 1.0858810725239934e-05,
13854
+ "loss": 2.5326,
13855
+ "step": 7912
13856
+ },
13857
+ {
13858
+ "epoch": 0.78,
13859
+ "grad_norm": 1.5747402906417847,
13860
+ "learning_rate": 1.083902245968141e-05,
13861
+ "loss": 2.5311,
13862
+ "step": 7916
13863
+ },
13864
+ {
13865
+ "epoch": 0.78,
13866
+ "grad_norm": 1.592838168144226,
13867
+ "learning_rate": 1.0819234194122887e-05,
13868
+ "loss": 2.3582,
13869
+ "step": 7920
13870
+ },
13871
+ {
13872
+ "epoch": 0.78,
13873
+ "grad_norm": 1.5652263164520264,
13874
+ "learning_rate": 1.0799445928564361e-05,
13875
+ "loss": 2.5613,
13876
+ "step": 7924
13877
+ },
13878
+ {
13879
+ "epoch": 0.78,
13880
+ "grad_norm": 1.5245355367660522,
13881
+ "learning_rate": 1.0779657663005837e-05,
13882
+ "loss": 2.7002,
13883
+ "step": 7928
13884
+ },
13885
+ {
13886
+ "epoch": 0.78,
13887
+ "grad_norm": 1.724204421043396,
13888
+ "learning_rate": 1.0759869397447314e-05,
13889
+ "loss": 2.9039,
13890
+ "step": 7932
13891
+ },
13892
+ {
13893
+ "epoch": 0.79,
13894
+ "grad_norm": 1.7106647491455078,
13895
+ "learning_rate": 1.074008113188879e-05,
13896
+ "loss": 2.5086,
13897
+ "step": 7936
13898
+ },
13899
+ {
13900
+ "epoch": 0.79,
13901
+ "grad_norm": 1.5160934925079346,
13902
+ "learning_rate": 1.0720292866330266e-05,
13903
+ "loss": 2.447,
13904
+ "step": 7940
13905
+ },
13906
+ {
13907
+ "epoch": 0.79,
13908
+ "grad_norm": 1.833022952079773,
13909
+ "learning_rate": 1.0700504600771743e-05,
13910
+ "loss": 2.4753,
13911
+ "step": 7944
13912
+ },
13913
+ {
13914
+ "epoch": 0.79,
13915
+ "grad_norm": 1.7385578155517578,
13916
+ "learning_rate": 1.0680716335213219e-05,
13917
+ "loss": 2.6183,
13918
+ "step": 7948
13919
+ },
13920
+ {
13921
+ "epoch": 0.79,
13922
+ "grad_norm": 1.8667320013046265,
13923
+ "learning_rate": 1.0660928069654695e-05,
13924
+ "loss": 2.5605,
13925
+ "step": 7952
13926
+ },
13927
+ {
13928
+ "epoch": 0.79,
13929
+ "grad_norm": 1.5835785865783691,
13930
+ "learning_rate": 1.0641139804096172e-05,
13931
+ "loss": 2.4019,
13932
+ "step": 7956
13933
+ },
13934
+ {
13935
+ "epoch": 0.79,
13936
+ "grad_norm": 1.7108229398727417,
13937
+ "learning_rate": 1.0621351538537648e-05,
13938
+ "loss": 2.708,
13939
+ "step": 7960
13940
+ },
13941
+ {
13942
+ "epoch": 0.79,
13943
+ "grad_norm": 1.674804925918579,
13944
+ "learning_rate": 1.0601563272979124e-05,
13945
+ "loss": 2.5102,
13946
+ "step": 7964
13947
+ },
13948
+ {
13949
+ "epoch": 0.79,
13950
+ "grad_norm": 1.955705165863037,
13951
+ "learning_rate": 1.0581775007420599e-05,
13952
+ "loss": 2.5221,
13953
+ "step": 7968
13954
+ },
13955
+ {
13956
+ "epoch": 0.79,
13957
+ "grad_norm": 1.740157961845398,
13958
+ "learning_rate": 1.0561986741862075e-05,
13959
+ "loss": 2.5232,
13960
+ "step": 7972
13961
+ },
13962
+ {
13963
+ "epoch": 0.79,
13964
+ "grad_norm": 1.6809014081954956,
13965
+ "learning_rate": 1.0542198476303553e-05,
13966
+ "loss": 2.3372,
13967
+ "step": 7976
13968
+ },
13969
+ {
13970
+ "epoch": 0.79,
13971
+ "grad_norm": 1.7483688592910767,
13972
+ "learning_rate": 1.052241021074503e-05,
13973
+ "loss": 2.4913,
13974
+ "step": 7980
13975
+ },
13976
+ {
13977
+ "epoch": 0.79,
13978
+ "grad_norm": 1.5279000997543335,
13979
+ "learning_rate": 1.0502621945186506e-05,
13980
+ "loss": 2.6386,
13981
+ "step": 7984
13982
+ },
13983
+ {
13984
+ "epoch": 0.79,
13985
+ "grad_norm": 1.7373197078704834,
13986
+ "learning_rate": 1.048283367962798e-05,
13987
+ "loss": 2.5823,
13988
+ "step": 7988
13989
+ },
13990
+ {
13991
+ "epoch": 0.79,
13992
+ "grad_norm": 1.6561895608901978,
13993
+ "learning_rate": 1.0463045414069457e-05,
13994
+ "loss": 2.4531,
13995
+ "step": 7992
13996
+ },
13997
+ {
13998
+ "epoch": 0.79,
13999
+ "grad_norm": 1.6324809789657593,
14000
+ "learning_rate": 1.0443257148510933e-05,
14001
+ "loss": 2.4943,
14002
+ "step": 7996
14003
+ },
14004
+ {
14005
+ "epoch": 0.79,
14006
+ "grad_norm": 1.4932702779769897,
14007
+ "learning_rate": 1.042346888295241e-05,
14008
+ "loss": 2.5745,
14009
+ "step": 8000
14010
+ },
14011
+ {
14012
+ "epoch": 0.79,
14013
+ "grad_norm": 1.6975860595703125,
14014
+ "learning_rate": 1.0403680617393886e-05,
14015
+ "loss": 2.5195,
14016
+ "step": 8004
14017
+ },
14018
+ {
14019
+ "epoch": 0.79,
14020
+ "grad_norm": 1.5818710327148438,
14021
+ "learning_rate": 1.0383892351835362e-05,
14022
+ "loss": 2.5146,
14023
+ "step": 8008
14024
+ },
14025
+ {
14026
+ "epoch": 0.79,
14027
+ "grad_norm": 1.6648439168930054,
14028
+ "learning_rate": 1.0364104086276839e-05,
14029
+ "loss": 2.5038,
14030
+ "step": 8012
14031
+ },
14032
+ {
14033
+ "epoch": 0.79,
14034
+ "grad_norm": 1.8664870262145996,
14035
+ "learning_rate": 1.0344315820718315e-05,
14036
+ "loss": 2.4395,
14037
+ "step": 8016
14038
+ },
14039
+ {
14040
+ "epoch": 0.79,
14041
+ "grad_norm": 1.6299059391021729,
14042
+ "learning_rate": 1.0324527555159791e-05,
14043
+ "loss": 2.632,
14044
+ "step": 8020
14045
+ },
14046
+ {
14047
+ "epoch": 0.79,
14048
+ "grad_norm": 1.6017898321151733,
14049
+ "learning_rate": 1.0304739289601268e-05,
14050
+ "loss": 2.523,
14051
+ "step": 8024
14052
+ },
14053
+ {
14054
+ "epoch": 0.79,
14055
+ "grad_norm": 1.8422410488128662,
14056
+ "learning_rate": 1.0284951024042744e-05,
14057
+ "loss": 2.5135,
14058
+ "step": 8028
14059
+ },
14060
+ {
14061
+ "epoch": 0.79,
14062
+ "grad_norm": 1.7916709184646606,
14063
+ "learning_rate": 1.0265162758484218e-05,
14064
+ "loss": 2.3953,
14065
+ "step": 8032
14066
+ },
14067
+ {
14068
+ "epoch": 0.8,
14069
+ "grad_norm": 1.6924244165420532,
14070
+ "learning_rate": 1.0245374492925695e-05,
14071
+ "loss": 2.5083,
14072
+ "step": 8036
14073
+ },
14074
+ {
14075
+ "epoch": 0.8,
14076
+ "grad_norm": 1.6407674551010132,
14077
+ "learning_rate": 1.0225586227367171e-05,
14078
+ "loss": 2.5489,
14079
+ "step": 8040
14080
+ },
14081
+ {
14082
+ "epoch": 0.8,
14083
+ "grad_norm": 2.751863718032837,
14084
+ "learning_rate": 1.0205797961808649e-05,
14085
+ "loss": 2.5538,
14086
+ "step": 8044
14087
+ },
14088
+ {
14089
+ "epoch": 0.8,
14090
+ "grad_norm": 1.8018790483474731,
14091
+ "learning_rate": 1.0186009696250125e-05,
14092
+ "loss": 2.4931,
14093
+ "step": 8048
14094
+ },
14095
+ {
14096
+ "epoch": 0.8,
14097
+ "grad_norm": 1.6749464273452759,
14098
+ "learning_rate": 1.01662214306916e-05,
14099
+ "loss": 2.5116,
14100
+ "step": 8052
14101
+ },
14102
+ {
14103
+ "epoch": 0.8,
14104
+ "grad_norm": 1.8620166778564453,
14105
+ "learning_rate": 1.0146433165133076e-05,
14106
+ "loss": 2.4857,
14107
+ "step": 8056
14108
+ },
14109
+ {
14110
+ "epoch": 0.8,
14111
+ "grad_norm": 1.8505146503448486,
14112
+ "learning_rate": 1.0126644899574553e-05,
14113
+ "loss": 2.535,
14114
+ "step": 8060
14115
+ },
14116
+ {
14117
+ "epoch": 0.8,
14118
+ "grad_norm": 1.4946340322494507,
14119
+ "learning_rate": 1.0106856634016029e-05,
14120
+ "loss": 2.3432,
14121
+ "step": 8064
14122
+ },
14123
+ {
14124
+ "epoch": 0.8,
14125
+ "grad_norm": 1.8005412817001343,
14126
+ "learning_rate": 1.0087068368457505e-05,
14127
+ "loss": 2.4651,
14128
+ "step": 8068
14129
+ },
14130
+ {
14131
+ "epoch": 0.8,
14132
+ "grad_norm": 1.786773443222046,
14133
+ "learning_rate": 1.006728010289898e-05,
14134
+ "loss": 2.4242,
14135
+ "step": 8072
14136
+ },
14137
+ {
14138
+ "epoch": 0.8,
14139
+ "grad_norm": 1.7355870008468628,
14140
+ "learning_rate": 1.0047491837340458e-05,
14141
+ "loss": 2.4785,
14142
+ "step": 8076
14143
+ },
14144
+ {
14145
+ "epoch": 0.8,
14146
+ "grad_norm": 1.677388310432434,
14147
+ "learning_rate": 1.0027703571781934e-05,
14148
+ "loss": 2.5572,
14149
+ "step": 8080
14150
+ },
14151
+ {
14152
+ "epoch": 0.8,
14153
+ "grad_norm": 1.7865822315216064,
14154
+ "learning_rate": 1.000791530622341e-05,
14155
+ "loss": 2.771,
14156
+ "step": 8084
14157
+ },
14158
+ {
14159
+ "epoch": 0.8,
14160
+ "grad_norm": 1.8583182096481323,
14161
+ "learning_rate": 9.988127040664887e-06,
14162
+ "loss": 2.4894,
14163
+ "step": 8088
14164
  }
14165
  ],
14166
  "logging_steps": 4,
 
14168
  "num_input_tokens_seen": 0,
14169
  "num_train_epochs": 1,
14170
  "save_steps": 1011,
14171
+ "total_flos": 9.891653776677274e+16,
14172
  "train_batch_size": 4,
14173
  "trial_name": null,
14174
  "trial_params": null