Training in progress, step 80, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 125048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:819d34c3c14db785e0dd28311c01d0c8f6dcd03cdecfe856b6d00ab306a6259e
|
3 |
size 125048
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 162868
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf9b6d9e891724bb31db21c88f9512591eca8a4a6ad4e8699987a16aa8a16498
|
3 |
size 162868
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77d6b80ff98d1964921d012a443cb87b248a1f2b5da6296fdc6b0c8c5f518f22
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7803647d52fbd7429a283dc695ba7cf653ff890c06d5c50f67d0a09610438889
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 11.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -617,6 +617,92 @@
|
|
617 |
"eval_samples_per_second": 52.828,
|
618 |
"eval_steps_per_second": 26.417,
|
619 |
"step": 70
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
620 |
}
|
621 |
],
|
622 |
"logging_steps": 1,
|
@@ -645,7 +731,7 @@
|
|
645 |
"attributes": {}
|
646 |
}
|
647 |
},
|
648 |
-
"total_flos":
|
649 |
"train_batch_size": 2,
|
650 |
"trial_name": null,
|
651 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 11.038910865783691,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-80",
|
4 |
+
"epoch": 0.0036157374974576844,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 80,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
617 |
"eval_samples_per_second": 52.828,
|
618 |
"eval_steps_per_second": 26.417,
|
619 |
"step": 70
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"epoch": 0.003208967028993695,
|
623 |
+
"grad_norm": 0.7522275447845459,
|
624 |
+
"learning_rate": 0.00019244917309000817,
|
625 |
+
"loss": 44.2246,
|
626 |
+
"step": 71
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"epoch": 0.0032541637477119163,
|
630 |
+
"grad_norm": 0.7135974168777466,
|
631 |
+
"learning_rate": 0.00019220287022200707,
|
632 |
+
"loss": 44.2111,
|
633 |
+
"step": 72
|
634 |
+
},
|
635 |
+
{
|
636 |
+
"epoch": 0.003299360466430137,
|
637 |
+
"grad_norm": 0.7275662422180176,
|
638 |
+
"learning_rate": 0.0001919527772551451,
|
639 |
+
"loss": 44.1464,
|
640 |
+
"step": 73
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"epoch": 0.003344557185148358,
|
644 |
+
"grad_norm": 0.6742229461669922,
|
645 |
+
"learning_rate": 0.00019169890446976454,
|
646 |
+
"loss": 44.2105,
|
647 |
+
"step": 74
|
648 |
+
},
|
649 |
+
{
|
650 |
+
"epoch": 0.0033897539038665794,
|
651 |
+
"grad_norm": 0.6085646152496338,
|
652 |
+
"learning_rate": 0.00019144126230158127,
|
653 |
+
"loss": 44.0926,
|
654 |
+
"step": 75
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"epoch": 0.0033897539038665794,
|
658 |
+
"eval_loss": 11.042237281799316,
|
659 |
+
"eval_runtime": 176.114,
|
660 |
+
"eval_samples_per_second": 52.903,
|
661 |
+
"eval_steps_per_second": 26.454,
|
662 |
+
"step": 75
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 0.0034349506225848005,
|
666 |
+
"grad_norm": 0.7245734333992004,
|
667 |
+
"learning_rate": 0.0001911798613412557,
|
668 |
+
"loss": 44.2154,
|
669 |
+
"step": 76
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 0.0034801473413030213,
|
673 |
+
"grad_norm": 0.7311281561851501,
|
674 |
+
"learning_rate": 0.0001909147123339575,
|
675 |
+
"loss": 44.1687,
|
676 |
+
"step": 77
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 0.0035253440600212425,
|
680 |
+
"grad_norm": 0.6399495601654053,
|
681 |
+
"learning_rate": 0.0001906458261789238,
|
682 |
+
"loss": 44.1596,
|
683 |
+
"step": 78
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 0.0035705407787394636,
|
687 |
+
"grad_norm": 0.5650178790092468,
|
688 |
+
"learning_rate": 0.00019037321392901136,
|
689 |
+
"loss": 44.1466,
|
690 |
+
"step": 79
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"epoch": 0.0036157374974576844,
|
694 |
+
"grad_norm": 0.6039579510688782,
|
695 |
+
"learning_rate": 0.0001900968867902419,
|
696 |
+
"loss": 44.1955,
|
697 |
+
"step": 80
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"epoch": 0.0036157374974576844,
|
701 |
+
"eval_loss": 11.038910865783691,
|
702 |
+
"eval_runtime": 176.3853,
|
703 |
+
"eval_samples_per_second": 52.822,
|
704 |
+
"eval_steps_per_second": 26.414,
|
705 |
+
"step": 80
|
706 |
}
|
707 |
],
|
708 |
"logging_steps": 1,
|
|
|
731 |
"attributes": {}
|
732 |
}
|
733 |
},
|
734 |
+
"total_flos": 840538521600.0,
|
735 |
"train_batch_size": 2,
|
736 |
"trial_name": null,
|
737 |
"trial_params": null
|