Saumya-Mundra commited on
Commit
31cf2fb
·
verified ·
1 Parent(s): 9b85988

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -1
  2. all_results.json +15 -15
  3. eval_results.json +10 -10
  4. train_results.json +6 -6
  5. trainer_state.json +582 -12
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: other
4
  base_model: nvidia/mit-b1
5
  tags:
 
 
6
  - generated_from_trainer
7
  model-index:
8
  - name: segformer-finetuned-tt-2k-b1
@@ -14,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # segformer-finetuned-tt-2k-b1
16
 
17
- This model is a fine-tuned version of [nvidia/mit-b1](https://huggingface.co/nvidia/mit-b1) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
  - Loss: 0.0912
20
  - Mean Iou: 0.4902
 
3
  license: other
4
  base_model: nvidia/mit-b1
5
  tags:
6
+ - image-segmentation
7
+ - vision
8
  - generated_from_trainer
9
  model-index:
10
  - name: segformer-finetuned-tt-2k-b1
 
16
 
17
  # segformer-finetuned-tt-2k-b1
18
 
19
+ This model is a fine-tuned version of [nvidia/mit-b1](https://huggingface.co/nvidia/mit-b1) on the Saumya-Mundra/text255 dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.0912
22
  - Mean Iou: 0.4902
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 16.0,
3
- "eval_accuracy_no_text": 0.9793073981417643,
4
  "eval_accuracy_text": NaN,
5
- "eval_iou_no_text": 0.9793073981417643,
6
  "eval_iou_text": 0.0,
7
- "eval_loss": 0.09290074557065964,
8
- "eval_mean_accuracy": 0.9793073981417643,
9
- "eval_mean_iou": 0.48965369907088213,
10
- "eval_overall_accuracy": 0.9793073981417643,
11
- "eval_runtime": 10.192,
12
- "eval_samples_per_second": 24.922,
13
- "eval_steps_per_second": 3.14,
14
- "total_flos": 1.032636453617664e+18,
15
- "train_loss": 0.0,
16
- "train_runtime": 0.0014,
17
- "train_samples_per_second": 11544617.925,
18
- "train_steps_per_second": 1443077.241
19
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "eval_accuracy_no_text": 0.9804553846542395,
4
  "eval_accuracy_text": NaN,
5
+ "eval_iou_no_text": 0.9804553846542395,
6
  "eval_iou_text": 0.0,
7
+ "eval_loss": 0.0911744013428688,
8
+ "eval_mean_accuracy": 0.9804553846542395,
9
+ "eval_mean_iou": 0.49022769232711977,
10
+ "eval_overall_accuracy": 0.9804553846542395,
11
+ "eval_runtime": 12.1143,
12
+ "eval_samples_per_second": 20.967,
13
+ "eval_steps_per_second": 2.642,
14
+ "total_flos": 2.58159113404416e+18,
15
+ "train_loss": 0.043670280265808104,
16
+ "train_runtime": 1256.8878,
17
+ "train_samples_per_second": 31.825,
18
+ "train_steps_per_second": 3.978
19
  }
eval_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 16.0,
3
- "eval_accuracy_no_text": 0.9793073981417643,
4
  "eval_accuracy_text": NaN,
5
- "eval_iou_no_text": 0.9793073981417643,
6
  "eval_iou_text": 0.0,
7
- "eval_loss": 0.09290074557065964,
8
- "eval_mean_accuracy": 0.9793073981417643,
9
- "eval_mean_iou": 0.48965369907088213,
10
- "eval_overall_accuracy": 0.9793073981417643,
11
- "eval_runtime": 10.192,
12
- "eval_samples_per_second": 24.922,
13
- "eval_steps_per_second": 3.14
14
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "eval_accuracy_no_text": 0.9804553846542395,
4
  "eval_accuracy_text": NaN,
5
+ "eval_iou_no_text": 0.9804553846542395,
6
  "eval_iou_text": 0.0,
7
+ "eval_loss": 0.0911744013428688,
8
+ "eval_mean_accuracy": 0.9804553846542395,
9
+ "eval_mean_iou": 0.49022769232711977,
10
+ "eval_overall_accuracy": 0.9804553846542395,
11
+ "eval_runtime": 12.1143,
12
+ "eval_samples_per_second": 20.967,
13
+ "eval_steps_per_second": 2.642
14
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 16.0,
3
- "total_flos": 1.032636453617664e+18,
4
- "train_loss": 0.0,
5
- "train_runtime": 0.0014,
6
- "train_samples_per_second": 11544617.925,
7
- "train_steps_per_second": 1443077.241
8
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "total_flos": 2.58159113404416e+18,
4
+ "train_loss": 0.043670280265808104,
5
+ "train_runtime": 1256.8878,
6
+ "train_samples_per_second": 31.825,
7
+ "train_steps_per_second": 3.978
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.0,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -389,19 +389,589 @@
389
  "step": 2000
390
  },
391
  {
392
- "epoch": 16.0,
393
- "step": 2000,
394
- "total_flos": 1.032636453617664e+18,
395
- "train_loss": 0.0,
396
- "train_runtime": 0.0014,
397
- "train_samples_per_second": 11544617.925,
398
- "train_steps_per_second": 1443077.241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  }
400
  ],
401
  "logging_steps": 100,
402
- "max_steps": 2000,
403
  "num_input_tokens_seen": 0,
404
- "num_train_epochs": 16,
405
  "save_steps": 500,
406
  "stateful_callbacks": {
407
  "TrainerControl": {
@@ -415,7 +985,7 @@
415
  "attributes": {}
416
  }
417
  },
418
- "total_flos": 1.032636453617664e+18,
419
  "train_batch_size": 8,
420
  "trial_name": null,
421
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 40.0,
5
  "eval_steps": 500,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
389
  "step": 2000
390
  },
391
  {
392
+ "epoch": 16.8,
393
+ "grad_norm": 1.0451207160949707,
394
+ "learning_rate": 3.9e-05,
395
+ "loss": 0.0884,
396
+ "step": 2100
397
+ },
398
+ {
399
+ "epoch": 17.0,
400
+ "eval_accuracy_no_text": 0.9750931240524451,
401
+ "eval_accuracy_text": NaN,
402
+ "eval_iou_no_text": 0.9750931240524451,
403
+ "eval_iou_text": 0.0,
404
+ "eval_loss": 0.10009197145700455,
405
+ "eval_mean_accuracy": 0.9750931240524451,
406
+ "eval_mean_iou": 0.48754656202622254,
407
+ "eval_overall_accuracy": 0.9750931240524451,
408
+ "eval_runtime": 9.5953,
409
+ "eval_samples_per_second": 26.471,
410
+ "eval_steps_per_second": 3.335,
411
+ "step": 2125
412
+ },
413
+ {
414
+ "epoch": 17.6,
415
+ "grad_norm": 0.429823637008667,
416
+ "learning_rate": 3.800000000000001e-05,
417
+ "loss": 0.0871,
418
+ "step": 2200
419
+ },
420
+ {
421
+ "epoch": 18.0,
422
+ "eval_accuracy_no_text": 0.9783042661894453,
423
+ "eval_accuracy_text": NaN,
424
+ "eval_iou_no_text": 0.9783042661894453,
425
+ "eval_iou_text": 0.0,
426
+ "eval_loss": 0.0907384604215622,
427
+ "eval_mean_accuracy": 0.9783042661894453,
428
+ "eval_mean_iou": 0.48915213309472266,
429
+ "eval_overall_accuracy": 0.9783042661894453,
430
+ "eval_runtime": 9.8871,
431
+ "eval_samples_per_second": 25.69,
432
+ "eval_steps_per_second": 3.237,
433
+ "step": 2250
434
+ },
435
+ {
436
+ "epoch": 18.4,
437
+ "grad_norm": 0.3153773844242096,
438
+ "learning_rate": 3.7000000000000005e-05,
439
+ "loss": 0.0854,
440
+ "step": 2300
441
+ },
442
+ {
443
+ "epoch": 19.0,
444
+ "eval_accuracy_no_text": 0.9848925234161184,
445
+ "eval_accuracy_text": NaN,
446
+ "eval_iou_no_text": 0.9848925234161184,
447
+ "eval_iou_text": 0.0,
448
+ "eval_loss": 0.08931159973144531,
449
+ "eval_mean_accuracy": 0.9848925234161184,
450
+ "eval_mean_iou": 0.4924462617080592,
451
+ "eval_overall_accuracy": 0.9848925234161184,
452
+ "eval_runtime": 10.5574,
453
+ "eval_samples_per_second": 24.059,
454
+ "eval_steps_per_second": 3.031,
455
+ "step": 2375
456
+ },
457
+ {
458
+ "epoch": 19.2,
459
+ "grad_norm": 0.4562213718891144,
460
+ "learning_rate": 3.6e-05,
461
+ "loss": 0.0865,
462
+ "step": 2400
463
+ },
464
+ {
465
+ "epoch": 20.0,
466
+ "grad_norm": 3.521730422973633,
467
+ "learning_rate": 3.5000000000000004e-05,
468
+ "loss": 0.0852,
469
+ "step": 2500
470
+ },
471
+ {
472
+ "epoch": 20.0,
473
+ "eval_accuracy_no_text": 0.9830510361429408,
474
+ "eval_accuracy_text": NaN,
475
+ "eval_iou_no_text": 0.9830510361429408,
476
+ "eval_iou_text": 0.0,
477
+ "eval_loss": 0.0869787335395813,
478
+ "eval_mean_accuracy": 0.9830510361429408,
479
+ "eval_mean_iou": 0.4915255180714704,
480
+ "eval_overall_accuracy": 0.9830510361429408,
481
+ "eval_runtime": 9.6683,
482
+ "eval_samples_per_second": 26.271,
483
+ "eval_steps_per_second": 3.31,
484
+ "step": 2500
485
+ },
486
+ {
487
+ "epoch": 20.8,
488
+ "grad_norm": 0.6932634115219116,
489
+ "learning_rate": 3.4e-05,
490
+ "loss": 0.0858,
491
+ "step": 2600
492
+ },
493
+ {
494
+ "epoch": 21.0,
495
+ "eval_accuracy_no_text": 0.9791810533292581,
496
+ "eval_accuracy_text": NaN,
497
+ "eval_iou_no_text": 0.9791810533292581,
498
+ "eval_iou_text": 0.0,
499
+ "eval_loss": 0.092495396733284,
500
+ "eval_mean_accuracy": 0.9791810533292581,
501
+ "eval_mean_iou": 0.48959052666462904,
502
+ "eval_overall_accuracy": 0.9791810533292581,
503
+ "eval_runtime": 9.6807,
504
+ "eval_samples_per_second": 26.238,
505
+ "eval_steps_per_second": 3.306,
506
+ "step": 2625
507
+ },
508
+ {
509
+ "epoch": 21.6,
510
+ "grad_norm": 1.7822684049606323,
511
+ "learning_rate": 3.2999999999999996e-05,
512
+ "loss": 0.0804,
513
+ "step": 2700
514
+ },
515
+ {
516
+ "epoch": 22.0,
517
+ "eval_accuracy_no_text": 0.9774249867137729,
518
+ "eval_accuracy_text": NaN,
519
+ "eval_iou_no_text": 0.9774249867137729,
520
+ "eval_iou_text": 0.0,
521
+ "eval_loss": 0.09636090695858002,
522
+ "eval_mean_accuracy": 0.9774249867137729,
523
+ "eval_mean_iou": 0.48871249335688643,
524
+ "eval_overall_accuracy": 0.9774249867137729,
525
+ "eval_runtime": 9.6707,
526
+ "eval_samples_per_second": 26.265,
527
+ "eval_steps_per_second": 3.309,
528
+ "step": 2750
529
+ },
530
+ {
531
+ "epoch": 22.4,
532
+ "grad_norm": 0.7184740304946899,
533
+ "learning_rate": 3.2e-05,
534
+ "loss": 0.076,
535
+ "step": 2800
536
+ },
537
+ {
538
+ "epoch": 23.0,
539
+ "eval_accuracy_no_text": 0.9786283861601919,
540
+ "eval_accuracy_text": NaN,
541
+ "eval_iou_no_text": 0.9786283861601919,
542
+ "eval_iou_text": 0.0,
543
+ "eval_loss": 0.09344039112329483,
544
+ "eval_mean_accuracy": 0.9786283861601919,
545
+ "eval_mean_iou": 0.48931419308009594,
546
+ "eval_overall_accuracy": 0.9786283861601919,
547
+ "eval_runtime": 10.2081,
548
+ "eval_samples_per_second": 24.882,
549
+ "eval_steps_per_second": 3.135,
550
+ "step": 2875
551
+ },
552
+ {
553
+ "epoch": 23.2,
554
+ "grad_norm": 0.669192373752594,
555
+ "learning_rate": 3.1e-05,
556
+ "loss": 0.0794,
557
+ "step": 2900
558
+ },
559
+ {
560
+ "epoch": 24.0,
561
+ "grad_norm": 0.8025239706039429,
562
+ "learning_rate": 3e-05,
563
+ "loss": 0.0753,
564
+ "step": 3000
565
+ },
566
+ {
567
+ "epoch": 24.0,
568
+ "eval_accuracy_no_text": 0.9780561292312697,
569
+ "eval_accuracy_text": NaN,
570
+ "eval_iou_no_text": 0.9780561292312697,
571
+ "eval_iou_text": 0.0,
572
+ "eval_loss": 0.0905555859208107,
573
+ "eval_mean_accuracy": 0.9780561292312697,
574
+ "eval_mean_iou": 0.48902806461563486,
575
+ "eval_overall_accuracy": 0.9780561292312697,
576
+ "eval_runtime": 9.666,
577
+ "eval_samples_per_second": 26.278,
578
+ "eval_steps_per_second": 3.311,
579
+ "step": 3000
580
+ },
581
+ {
582
+ "epoch": 24.8,
583
+ "grad_norm": 2.0820531845092773,
584
+ "learning_rate": 2.9e-05,
585
+ "loss": 0.0742,
586
+ "step": 3100
587
+ },
588
+ {
589
+ "epoch": 25.0,
590
+ "eval_accuracy_no_text": 0.9800921017793534,
591
+ "eval_accuracy_text": NaN,
592
+ "eval_iou_no_text": 0.9800921017793534,
593
+ "eval_iou_text": 0.0,
594
+ "eval_loss": 0.09615642577409744,
595
+ "eval_mean_accuracy": 0.9800921017793534,
596
+ "eval_mean_iou": 0.4900460508896767,
597
+ "eval_overall_accuracy": 0.9800921017793534,
598
+ "eval_runtime": 10.619,
599
+ "eval_samples_per_second": 23.919,
600
+ "eval_steps_per_second": 3.013,
601
+ "step": 3125
602
+ },
603
+ {
604
+ "epoch": 25.6,
605
+ "grad_norm": 0.9775315523147583,
606
+ "learning_rate": 2.7999999999999996e-05,
607
+ "loss": 0.0724,
608
+ "step": 3200
609
+ },
610
+ {
611
+ "epoch": 26.0,
612
+ "eval_accuracy_no_text": 0.9840405599414667,
613
+ "eval_accuracy_text": NaN,
614
+ "eval_iou_no_text": 0.9840405599414667,
615
+ "eval_iou_text": 0.0,
616
+ "eval_loss": 0.08916542679071426,
617
+ "eval_mean_accuracy": 0.9840405599414667,
618
+ "eval_mean_iou": 0.49202027997073333,
619
+ "eval_overall_accuracy": 0.9840405599414667,
620
+ "eval_runtime": 10.4734,
621
+ "eval_samples_per_second": 24.252,
622
+ "eval_steps_per_second": 3.055,
623
+ "step": 3250
624
+ },
625
+ {
626
+ "epoch": 26.4,
627
+ "grad_norm": 0.5055297017097473,
628
+ "learning_rate": 2.7e-05,
629
+ "loss": 0.0794,
630
+ "step": 3300
631
+ },
632
+ {
633
+ "epoch": 27.0,
634
+ "eval_accuracy_no_text": 0.9803492277621956,
635
+ "eval_accuracy_text": NaN,
636
+ "eval_iou_no_text": 0.9803492277621956,
637
+ "eval_iou_text": 0.0,
638
+ "eval_loss": 0.08851944655179977,
639
+ "eval_mean_accuracy": 0.9803492277621956,
640
+ "eval_mean_iou": 0.4901746138810978,
641
+ "eval_overall_accuracy": 0.9803492277621956,
642
+ "eval_runtime": 10.0091,
643
+ "eval_samples_per_second": 25.377,
644
+ "eval_steps_per_second": 3.197,
645
+ "step": 3375
646
+ },
647
+ {
648
+ "epoch": 27.2,
649
+ "grad_norm": 0.45488983392715454,
650
+ "learning_rate": 2.5999999999999995e-05,
651
+ "loss": 0.0719,
652
+ "step": 3400
653
+ },
654
+ {
655
+ "epoch": 28.0,
656
+ "grad_norm": 2.387666702270508,
657
+ "learning_rate": 2.5000000000000005e-05,
658
+ "loss": 0.0685,
659
+ "step": 3500
660
+ },
661
+ {
662
+ "epoch": 28.0,
663
+ "eval_accuracy_no_text": 0.9821194342297905,
664
+ "eval_accuracy_text": NaN,
665
+ "eval_iou_no_text": 0.9821194342297905,
666
+ "eval_iou_text": 0.0,
667
+ "eval_loss": 0.09322857111692429,
668
+ "eval_mean_accuracy": 0.9821194342297905,
669
+ "eval_mean_iou": 0.49105971711489527,
670
+ "eval_overall_accuracy": 0.9821194342297905,
671
+ "eval_runtime": 9.8063,
672
+ "eval_samples_per_second": 25.902,
673
+ "eval_steps_per_second": 3.263,
674
+ "step": 3500
675
+ },
676
+ {
677
+ "epoch": 28.8,
678
+ "grad_norm": 0.6571559906005859,
679
+ "learning_rate": 2.4000000000000004e-05,
680
+ "loss": 0.0695,
681
+ "step": 3600
682
+ },
683
+ {
684
+ "epoch": 29.0,
685
+ "eval_accuracy_no_text": 0.9811816347081362,
686
+ "eval_accuracy_text": NaN,
687
+ "eval_iou_no_text": 0.9811816347081362,
688
+ "eval_iou_text": 0.0,
689
+ "eval_loss": 0.08898366987705231,
690
+ "eval_mean_accuracy": 0.9811816347081362,
691
+ "eval_mean_iou": 0.4905908173540681,
692
+ "eval_overall_accuracy": 0.9811816347081362,
693
+ "eval_runtime": 9.8466,
694
+ "eval_samples_per_second": 25.796,
695
+ "eval_steps_per_second": 3.25,
696
+ "step": 3625
697
+ },
698
+ {
699
+ "epoch": 29.6,
700
+ "grad_norm": 2.2663826942443848,
701
+ "learning_rate": 2.3000000000000003e-05,
702
+ "loss": 0.065,
703
+ "step": 3700
704
+ },
705
+ {
706
+ "epoch": 30.0,
707
+ "eval_accuracy_no_text": 0.9808195315388902,
708
+ "eval_accuracy_text": NaN,
709
+ "eval_iou_no_text": 0.9808195315388902,
710
+ "eval_iou_text": 0.0,
711
+ "eval_loss": 0.08774251490831375,
712
+ "eval_mean_accuracy": 0.9808195315388902,
713
+ "eval_mean_iou": 0.4904097657694451,
714
+ "eval_overall_accuracy": 0.9808195315388902,
715
+ "eval_runtime": 10.278,
716
+ "eval_samples_per_second": 24.713,
717
+ "eval_steps_per_second": 3.113,
718
+ "step": 3750
719
+ },
720
+ {
721
+ "epoch": 30.4,
722
+ "grad_norm": 0.26865702867507935,
723
+ "learning_rate": 2.2e-05,
724
+ "loss": 0.0699,
725
+ "step": 3800
726
+ },
727
+ {
728
+ "epoch": 31.0,
729
+ "eval_accuracy_no_text": 0.975387618457608,
730
+ "eval_accuracy_text": NaN,
731
+ "eval_iou_no_text": 0.975387618457608,
732
+ "eval_iou_text": 0.0,
733
+ "eval_loss": 0.09474235773086548,
734
+ "eval_mean_accuracy": 0.975387618457608,
735
+ "eval_mean_iou": 0.487693809228804,
736
+ "eval_overall_accuracy": 0.975387618457608,
737
+ "eval_runtime": 9.5955,
738
+ "eval_samples_per_second": 26.471,
739
+ "eval_steps_per_second": 3.335,
740
+ "step": 3875
741
+ },
742
+ {
743
+ "epoch": 31.2,
744
+ "grad_norm": 0.4936189353466034,
745
+ "learning_rate": 2.1e-05,
746
+ "loss": 0.0691,
747
+ "step": 3900
748
+ },
749
+ {
750
+ "epoch": 32.0,
751
+ "grad_norm": 0.5088217854499817,
752
+ "learning_rate": 1.9999999999999998e-05,
753
+ "loss": 0.0742,
754
+ "step": 4000
755
+ },
756
+ {
757
+ "epoch": 32.0,
758
+ "eval_accuracy_no_text": 0.9804912244438997,
759
+ "eval_accuracy_text": NaN,
760
+ "eval_iou_no_text": 0.9804912244438997,
761
+ "eval_iou_text": 0.0,
762
+ "eval_loss": 0.08749811351299286,
763
+ "eval_mean_accuracy": 0.9804912244438997,
764
+ "eval_mean_iou": 0.49024561222194984,
765
+ "eval_overall_accuracy": 0.9804912244438997,
766
+ "eval_runtime": 9.7343,
767
+ "eval_samples_per_second": 26.093,
768
+ "eval_steps_per_second": 3.287,
769
+ "step": 4000
770
+ },
771
+ {
772
+ "epoch": 32.8,
773
+ "grad_norm": 0.5367891192436218,
774
+ "learning_rate": 1.9000000000000008e-05,
775
+ "loss": 0.0646,
776
+ "step": 4100
777
+ },
778
+ {
779
+ "epoch": 33.0,
780
+ "eval_accuracy_no_text": 0.9805454078254859,
781
+ "eval_accuracy_text": NaN,
782
+ "eval_iou_no_text": 0.9805454078254859,
783
+ "eval_iou_text": 0.0,
784
+ "eval_loss": 0.0895417109131813,
785
+ "eval_mean_accuracy": 0.9805454078254859,
786
+ "eval_mean_iou": 0.49027270391274297,
787
+ "eval_overall_accuracy": 0.9805454078254859,
788
+ "eval_runtime": 10.3388,
789
+ "eval_samples_per_second": 24.568,
790
+ "eval_steps_per_second": 3.095,
791
+ "step": 4125
792
+ },
793
+ {
794
+ "epoch": 33.6,
795
+ "grad_norm": 1.4165046215057373,
796
+ "learning_rate": 1.8000000000000004e-05,
797
+ "loss": 0.0677,
798
+ "step": 4200
799
+ },
800
+ {
801
+ "epoch": 34.0,
802
+ "eval_accuracy_no_text": 0.9818302401922222,
803
+ "eval_accuracy_text": NaN,
804
+ "eval_iou_no_text": 0.9818302401922222,
805
+ "eval_iou_text": 0.0,
806
+ "eval_loss": 0.0914706438779831,
807
+ "eval_mean_accuracy": 0.9818302401922222,
808
+ "eval_mean_iou": 0.4909151200961111,
809
+ "eval_overall_accuracy": 0.9818302401922222,
810
+ "eval_runtime": 9.553,
811
+ "eval_samples_per_second": 26.589,
812
+ "eval_steps_per_second": 3.35,
813
+ "step": 4250
814
+ },
815
+ {
816
+ "epoch": 34.4,
817
+ "grad_norm": 0.45522621273994446,
818
+ "learning_rate": 1.7000000000000003e-05,
819
+ "loss": 0.0666,
820
+ "step": 4300
821
+ },
822
+ {
823
+ "epoch": 35.0,
824
+ "eval_accuracy_no_text": 0.9780782611737024,
825
+ "eval_accuracy_text": NaN,
826
+ "eval_iou_no_text": 0.9780782611737024,
827
+ "eval_iou_text": 0.0,
828
+ "eval_loss": 0.0931800901889801,
829
+ "eval_mean_accuracy": 0.9780782611737024,
830
+ "eval_mean_iou": 0.4890391305868512,
831
+ "eval_overall_accuracy": 0.9780782611737024,
832
+ "eval_runtime": 10.7796,
833
+ "eval_samples_per_second": 23.563,
834
+ "eval_steps_per_second": 2.969,
835
+ "step": 4375
836
+ },
837
+ {
838
+ "epoch": 35.2,
839
+ "grad_norm": 1.4983059167861938,
840
+ "learning_rate": 1.6e-05,
841
+ "loss": 0.065,
842
+ "step": 4400
843
+ },
844
+ {
845
+ "epoch": 36.0,
846
+ "grad_norm": 0.9115901589393616,
847
+ "learning_rate": 1.5e-05,
848
+ "loss": 0.062,
849
+ "step": 4500
850
+ },
851
+ {
852
+ "epoch": 36.0,
853
+ "eval_accuracy_no_text": 0.9802527743644315,
854
+ "eval_accuracy_text": NaN,
855
+ "eval_iou_no_text": 0.9802527743644315,
856
+ "eval_iou_text": 0.0,
857
+ "eval_loss": 0.08927793055772781,
858
+ "eval_mean_accuracy": 0.9802527743644315,
859
+ "eval_mean_iou": 0.49012638718221574,
860
+ "eval_overall_accuracy": 0.9802527743644315,
861
+ "eval_runtime": 9.8416,
862
+ "eval_samples_per_second": 25.809,
863
+ "eval_steps_per_second": 3.251,
864
+ "step": 4500
865
+ },
866
+ {
867
+ "epoch": 36.8,
868
+ "grad_norm": 0.7726097106933594,
869
+ "learning_rate": 1.3999999999999998e-05,
870
+ "loss": 0.0623,
871
+ "step": 4600
872
+ },
873
+ {
874
+ "epoch": 37.0,
875
+ "eval_accuracy_no_text": 0.9789289452492796,
876
+ "eval_accuracy_text": NaN,
877
+ "eval_iou_no_text": 0.9789289452492796,
878
+ "eval_iou_text": 0.0,
879
+ "eval_loss": 0.09337003529071808,
880
+ "eval_mean_accuracy": 0.9789289452492796,
881
+ "eval_mean_iou": 0.4894644726246398,
882
+ "eval_overall_accuracy": 0.9789289452492796,
883
+ "eval_runtime": 10.0951,
884
+ "eval_samples_per_second": 25.161,
885
+ "eval_steps_per_second": 3.17,
886
+ "step": 4625
887
+ },
888
+ {
889
+ "epoch": 37.6,
890
+ "grad_norm": 0.4795573651790619,
891
+ "learning_rate": 1.3000000000000004e-05,
892
+ "loss": 0.0658,
893
+ "step": 4700
894
+ },
895
+ {
896
+ "epoch": 38.0,
897
+ "eval_accuracy_no_text": 0.9825526686644677,
898
+ "eval_accuracy_text": NaN,
899
+ "eval_iou_no_text": 0.9825526686644677,
900
+ "eval_iou_text": 0.0,
901
+ "eval_loss": 0.09065607190132141,
902
+ "eval_mean_accuracy": 0.9825526686644677,
903
+ "eval_mean_iou": 0.49127633433223383,
904
+ "eval_overall_accuracy": 0.9825526686644677,
905
+ "eval_runtime": 10.2603,
906
+ "eval_samples_per_second": 24.756,
907
+ "eval_steps_per_second": 3.119,
908
+ "step": 4750
909
+ },
910
+ {
911
+ "epoch": 38.4,
912
+ "grad_norm": 0.9340759515762329,
913
+ "learning_rate": 1.2000000000000002e-05,
914
+ "loss": 0.0596,
915
+ "step": 4800
916
+ },
917
+ {
918
+ "epoch": 39.0,
919
+ "eval_accuracy_no_text": 0.9830672030948829,
920
+ "eval_accuracy_text": NaN,
921
+ "eval_iou_no_text": 0.9830672030948829,
922
+ "eval_iou_text": 0.0,
923
+ "eval_loss": 0.09037832170724869,
924
+ "eval_mean_accuracy": 0.9830672030948829,
925
+ "eval_mean_iou": 0.49153360154744147,
926
+ "eval_overall_accuracy": 0.9830672030948829,
927
+ "eval_runtime": 10.5023,
928
+ "eval_samples_per_second": 24.185,
929
+ "eval_steps_per_second": 3.047,
930
+ "step": 4875
931
+ },
932
+ {
933
+ "epoch": 39.2,
934
+ "grad_norm": 0.2577395737171173,
935
+ "learning_rate": 1.1000000000000001e-05,
936
+ "loss": 0.0637,
937
+ "step": 4900
938
+ },
939
+ {
940
+ "epoch": 40.0,
941
+ "grad_norm": 0.7885801792144775,
942
+ "learning_rate": 9.999999999999999e-06,
943
+ "loss": 0.0628,
944
+ "step": 5000
945
+ },
946
+ {
947
+ "epoch": 40.0,
948
+ "eval_accuracy_no_text": 0.9804553846542395,
949
+ "eval_accuracy_text": NaN,
950
+ "eval_iou_no_text": 0.9804553846542395,
951
+ "eval_iou_text": 0.0,
952
+ "eval_loss": 0.0911744087934494,
953
+ "eval_mean_accuracy": 0.9804553846542395,
954
+ "eval_mean_iou": 0.49022769232711977,
955
+ "eval_overall_accuracy": 0.9804553846542395,
956
+ "eval_runtime": 9.7243,
957
+ "eval_samples_per_second": 26.12,
958
+ "eval_steps_per_second": 3.291,
959
+ "step": 5000
960
+ },
961
+ {
962
+ "epoch": 40.0,
963
+ "step": 5000,
964
+ "total_flos": 2.58159113404416e+18,
965
+ "train_loss": 0.043670280265808104,
966
+ "train_runtime": 1256.8878,
967
+ "train_samples_per_second": 31.825,
968
+ "train_steps_per_second": 3.978
969
  }
970
  ],
971
  "logging_steps": 100,
972
+ "max_steps": 5000,
973
  "num_input_tokens_seen": 0,
974
+ "num_train_epochs": 40,
975
  "save_steps": 500,
976
  "stateful_callbacks": {
977
  "TrainerControl": {
 
985
  "attributes": {}
986
  }
987
  },
988
+ "total_flos": 2.58159113404416e+18,
989
  "train_batch_size": 8,
990
  "trial_name": null,
991
  "trial_params": null