Training in progress, step 1050, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 921238736
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30a19f851e92120b55e4b3e48cb7a19666339f280f7846f0368ed6553229846c
|
3 |
size 921238736
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1808993594
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f86f420488c8931f87fe380cac8026e362a39fcc956c3cbceb4f6eaa1202966a
|
3 |
size 1808993594
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85a5717b56a12e8c1a74fbcc04f95d33d38ad282e6759eb62471db1fd54435cd
|
3 |
size 14180
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:faf19dc22a4fe1ad09e77bb25c83ade70823bae5378e9f8bd12663aae71b06a4
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.13146419823169708,
|
3 |
"best_model_checkpoint": "./output/checkpoint-150",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -685,6 +685,119 @@
|
|
685 |
"eval_samples_per_second": 11.569,
|
686 |
"eval_steps_per_second": 11.569,
|
687 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
688 |
}
|
689 |
],
|
690 |
"logging_steps": 10,
|
@@ -704,7 +817,7 @@
|
|
704 |
"attributes": {}
|
705 |
}
|
706 |
},
|
707 |
-
"total_flos":
|
708 |
"train_batch_size": 8,
|
709 |
"trial_name": null,
|
710 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.13146419823169708,
|
3 |
"best_model_checkpoint": "./output/checkpoint-150",
|
4 |
+
"epoch": 5.172413793103448,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 1050,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
685 |
"eval_samples_per_second": 11.569,
|
686 |
"eval_steps_per_second": 11.569,
|
687 |
"step": 900
|
688 |
+
},
|
689 |
+
{
|
690 |
+
"epoch": 4.482758620689655,
|
691 |
+
"grad_norm": 1.3206291198730469,
|
692 |
+
"learning_rate": 6.088655409611797e-05,
|
693 |
+
"loss": 0.0195,
|
694 |
+
"step": 910
|
695 |
+
},
|
696 |
+
{
|
697 |
+
"epoch": 4.532019704433497,
|
698 |
+
"grad_norm": 0.7733320593833923,
|
699 |
+
"learning_rate": 6.07822678974027e-05,
|
700 |
+
"loss": 0.0225,
|
701 |
+
"step": 920
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"epoch": 4.58128078817734,
|
705 |
+
"grad_norm": 0.5630067586898804,
|
706 |
+
"learning_rate": 6.067682289967549e-05,
|
707 |
+
"loss": 0.0209,
|
708 |
+
"step": 930
|
709 |
+
},
|
710 |
+
{
|
711 |
+
"epoch": 4.630541871921182,
|
712 |
+
"grad_norm": 0.7231793403625488,
|
713 |
+
"learning_rate": 6.05702234373672e-05,
|
714 |
+
"loss": 0.0257,
|
715 |
+
"step": 940
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"epoch": 4.679802955665025,
|
719 |
+
"grad_norm": 1.0794564485549927,
|
720 |
+
"learning_rate": 6.04624738923642e-05,
|
721 |
+
"loss": 0.0247,
|
722 |
+
"step": 950
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 4.7290640394088665,
|
726 |
+
"grad_norm": 1.263869047164917,
|
727 |
+
"learning_rate": 6.0353578693828246e-05,
|
728 |
+
"loss": 0.0222,
|
729 |
+
"step": 960
|
730 |
+
},
|
731 |
+
{
|
732 |
+
"epoch": 4.778325123152709,
|
733 |
+
"grad_norm": 1.1668565273284912,
|
734 |
+
"learning_rate": 6.0243542318014456e-05,
|
735 |
+
"loss": 0.0291,
|
736 |
+
"step": 970
|
737 |
+
},
|
738 |
+
{
|
739 |
+
"epoch": 4.827586206896552,
|
740 |
+
"grad_norm": 1.756858229637146,
|
741 |
+
"learning_rate": 6.013236928808725e-05,
|
742 |
+
"loss": 0.0263,
|
743 |
+
"step": 980
|
744 |
+
},
|
745 |
+
{
|
746 |
+
"epoch": 4.876847290640394,
|
747 |
+
"grad_norm": 1.017069935798645,
|
748 |
+
"learning_rate": 6.002006417393445e-05,
|
749 |
+
"loss": 0.0241,
|
750 |
+
"step": 990
|
751 |
+
},
|
752 |
+
{
|
753 |
+
"epoch": 4.926108374384237,
|
754 |
+
"grad_norm": 1.153463363647461,
|
755 |
+
"learning_rate": 5.9906631591979426e-05,
|
756 |
+
"loss": 0.0255,
|
757 |
+
"step": 1000
|
758 |
+
},
|
759 |
+
{
|
760 |
+
"epoch": 4.975369458128079,
|
761 |
+
"grad_norm": 0.9330563545227051,
|
762 |
+
"learning_rate": 5.979207620499136e-05,
|
763 |
+
"loss": 0.0289,
|
764 |
+
"step": 1010
|
765 |
+
},
|
766 |
+
{
|
767 |
+
"epoch": 5.024630541871921,
|
768 |
+
"grad_norm": 0.6231732368469238,
|
769 |
+
"learning_rate": 5.96764027218935e-05,
|
770 |
+
"loss": 0.0204,
|
771 |
+
"step": 1020
|
772 |
+
},
|
773 |
+
{
|
774 |
+
"epoch": 5.073891625615763,
|
775 |
+
"grad_norm": 0.6820633411407471,
|
776 |
+
"learning_rate": 5.95596158975697e-05,
|
777 |
+
"loss": 0.0117,
|
778 |
+
"step": 1030
|
779 |
+
},
|
780 |
+
{
|
781 |
+
"epoch": 5.123152709359606,
|
782 |
+
"grad_norm": 0.6919358968734741,
|
783 |
+
"learning_rate": 5.944172053266886e-05,
|
784 |
+
"loss": 0.0142,
|
785 |
+
"step": 1040
|
786 |
+
},
|
787 |
+
{
|
788 |
+
"epoch": 5.172413793103448,
|
789 |
+
"grad_norm": 2.5571327209472656,
|
790 |
+
"learning_rate": 5.932272147340768e-05,
|
791 |
+
"loss": 0.0187,
|
792 |
+
"step": 1050
|
793 |
+
},
|
794 |
+
{
|
795 |
+
"epoch": 5.172413793103448,
|
796 |
+
"eval_loss": 0.20086674392223358,
|
797 |
+
"eval_runtime": 15.4256,
|
798 |
+
"eval_samples_per_second": 11.734,
|
799 |
+
"eval_steps_per_second": 11.734,
|
800 |
+
"step": 1050
|
801 |
}
|
802 |
],
|
803 |
"logging_steps": 10,
|
|
|
817 |
"attributes": {}
|
818 |
}
|
819 |
},
|
820 |
+
"total_flos": 6.349714559045222e+16,
|
821 |
"train_batch_size": 8,
|
822 |
"trial_name": null,
|
823 |
"trial_params": null
|