Training in progress, step 3750, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 69527352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3974b1bb4f346d15b63d67a72929b7007ca3f86b53199a1fd7e5e099e9c14afa
|
3 |
size 69527352
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 139313554
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:474b61e6b7c602c238188085fc11f74c10ee7fc4e1df53ec4af036170a9079c5
|
3 |
size 139313554
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14308
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba2a26069fc52ae057d8721b042adddf9f086526ef980fe3ff35c4c5a9d329e8
|
3 |
size 14308
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:607202c5623a0d7c32f1c23cdb843f56835e006025f9cf66875098e00a1be2b7
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./output/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2719,6 +2719,119 @@
|
|
2719 |
"eval_samples_per_second": 8.968,
|
2720 |
"eval_steps_per_second": 8.968,
|
2721 |
"step": 3600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2722 |
}
|
2723 |
],
|
2724 |
"logging_steps": 10,
|
@@ -2738,7 +2851,7 @@
|
|
2738 |
"attributes": {}
|
2739 |
}
|
2740 |
},
|
2741 |
-
"total_flos": 1.
|
2742 |
"train_batch_size": 16,
|
2743 |
"trial_name": null,
|
2744 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.7807769775390625,
|
3 |
+
"best_model_checkpoint": "./output/checkpoint-3750",
|
4 |
+
"epoch": 0.46618597712580806,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 3750,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2719 |
"eval_samples_per_second": 8.968,
|
2720 |
"eval_steps_per_second": 8.968,
|
2721 |
"step": 3600
|
2722 |
+
},
|
2723 |
+
{
|
2724 |
+
"epoch": 0.44878170064644457,
|
2725 |
+
"grad_norm": 1.402942419052124,
|
2726 |
+
"learning_rate": 2.3219400131992273e-05,
|
2727 |
+
"loss": 0.6078,
|
2728 |
+
"step": 3610
|
2729 |
+
},
|
2730 |
+
{
|
2731 |
+
"epoch": 0.4500248632521134,
|
2732 |
+
"grad_norm": 0.9987258315086365,
|
2733 |
+
"learning_rate": 2.2908527551764404e-05,
|
2734 |
+
"loss": 0.5269,
|
2735 |
+
"step": 3620
|
2736 |
+
},
|
2737 |
+
{
|
2738 |
+
"epoch": 0.4512680258577822,
|
2739 |
+
"grad_norm": 1.655179500579834,
|
2740 |
+
"learning_rate": 2.259928242189966e-05,
|
2741 |
+
"loss": 0.5955,
|
2742 |
+
"step": 3630
|
2743 |
+
},
|
2744 |
+
{
|
2745 |
+
"epoch": 0.452511188463451,
|
2746 |
+
"grad_norm": 1.3401436805725098,
|
2747 |
+
"learning_rate": 2.2291677454254136e-05,
|
2748 |
+
"loss": 0.6061,
|
2749 |
+
"step": 3640
|
2750 |
+
},
|
2751 |
+
{
|
2752 |
+
"epoch": 0.4537543510691198,
|
2753 |
+
"grad_norm": 1.766615629196167,
|
2754 |
+
"learning_rate": 2.1985725293263237e-05,
|
2755 |
+
"loss": 0.6185,
|
2756 |
+
"step": 3650
|
2757 |
+
},
|
2758 |
+
{
|
2759 |
+
"epoch": 0.4549975136747887,
|
2760 |
+
"grad_norm": 1.7541744709014893,
|
2761 |
+
"learning_rate": 2.1681438515421953e-05,
|
2762 |
+
"loss": 0.5724,
|
2763 |
+
"step": 3660
|
2764 |
+
},
|
2765 |
+
{
|
2766 |
+
"epoch": 0.4562406762804575,
|
2767 |
+
"grad_norm": 1.9028109312057495,
|
2768 |
+
"learning_rate": 2.1378829628767965e-05,
|
2769 |
+
"loss": 0.5688,
|
2770 |
+
"step": 3670
|
2771 |
+
},
|
2772 |
+
{
|
2773 |
+
"epoch": 0.4574838388861263,
|
2774 |
+
"grad_norm": 1.54623281955719,
|
2775 |
+
"learning_rate": 2.1077911072367317e-05,
|
2776 |
+
"loss": 0.6044,
|
2777 |
+
"step": 3680
|
2778 |
+
},
|
2779 |
+
{
|
2780 |
+
"epoch": 0.4587270014917951,
|
2781 |
+
"grad_norm": 1.4844456911087036,
|
2782 |
+
"learning_rate": 2.077869521580325e-05,
|
2783 |
+
"loss": 0.5635,
|
2784 |
+
"step": 3690
|
2785 |
+
},
|
2786 |
+
{
|
2787 |
+
"epoch": 0.45997016409746394,
|
2788 |
+
"grad_norm": 1.464686632156372,
|
2789 |
+
"learning_rate": 2.0481194358667695e-05,
|
2790 |
+
"loss": 0.5237,
|
2791 |
+
"step": 3700
|
2792 |
+
},
|
2793 |
+
{
|
2794 |
+
"epoch": 0.46121332670313275,
|
2795 |
+
"grad_norm": 1.3379572629928589,
|
2796 |
+
"learning_rate": 2.018542073005567e-05,
|
2797 |
+
"loss": 0.5913,
|
2798 |
+
"step": 3710
|
2799 |
+
},
|
2800 |
+
{
|
2801 |
+
"epoch": 0.46245648930880157,
|
2802 |
+
"grad_norm": 1.292743444442749,
|
2803 |
+
"learning_rate": 1.9891386488062538e-05,
|
2804 |
+
"loss": 0.5878,
|
2805 |
+
"step": 3720
|
2806 |
+
},
|
2807 |
+
{
|
2808 |
+
"epoch": 0.46369965191447043,
|
2809 |
+
"grad_norm": 1.7692592144012451,
|
2810 |
+
"learning_rate": 1.959910371928436e-05,
|
2811 |
+
"loss": 0.5772,
|
2812 |
+
"step": 3730
|
2813 |
+
},
|
2814 |
+
{
|
2815 |
+
"epoch": 0.46494281452013925,
|
2816 |
+
"grad_norm": 1.5741891860961914,
|
2817 |
+
"learning_rate": 1.930858443832096e-05,
|
2818 |
+
"loss": 0.5899,
|
2819 |
+
"step": 3740
|
2820 |
+
},
|
2821 |
+
{
|
2822 |
+
"epoch": 0.46618597712580806,
|
2823 |
+
"grad_norm": 2.228027105331421,
|
2824 |
+
"learning_rate": 1.90198405872821e-05,
|
2825 |
+
"loss": 0.6182,
|
2826 |
+
"step": 3750
|
2827 |
+
},
|
2828 |
+
{
|
2829 |
+
"epoch": 0.46618597712580806,
|
2830 |
+
"eval_loss": 0.7807769775390625,
|
2831 |
+
"eval_runtime": 54.8116,
|
2832 |
+
"eval_samples_per_second": 9.122,
|
2833 |
+
"eval_steps_per_second": 9.122,
|
2834 |
+
"step": 3750
|
2835 |
}
|
2836 |
],
|
2837 |
"logging_steps": 10,
|
|
|
2851 |
"attributes": {}
|
2852 |
}
|
2853 |
},
|
2854 |
+
"total_flos": 1.3044092846143488e+17,
|
2855 |
"train_batch_size": 16,
|
2856 |
"trial_name": null,
|
2857 |
"trial_params": null
|