diff --git a/.gitattributes b/.gitattributes index 1e200d8fe2278920e36df1a4cd45bca5fab5e571..cdca914470d4be12c65840d301946cf56cba2927 100644 --- a/.gitattributes +++ b/.gitattributes @@ -465,3 +465,51 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 619m22b22b/evaluation/generation/examples.619m22b22b_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text 619m22b4b8/evaluation/generation/examples.619m22b4b8_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text 619m22b4b8/evaluation/generation/examples.619m22b4b8_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text +83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c2d47bc191682e590d0a4315b842844a86c7c44 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd453915033abd4d586a44264b5d89df1c4fd960cece0e4d0c3e2f815313324c +size 15518743 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..40b95f076733929bd8b6b7bcbfcb6e7124f661ae --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:689c8aa7ad63887b2786da95fe9d21aaba3f427153c6d6a0a44bcf6615c7fb34 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9cfbbde6982975a91d7ec81127afc515dc18d8b --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4187a2a1589dde01d88bd29ac739470e5d5a86f4e0127be5ab7a8e3ebfef1543 +size 15518626 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f29dbd1849d41a5c062385ed666c7becdb7bc76a --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3bf67040d792eb95b8b2ff3e1747f187dd399e23b39c1205edaa9fc289a5e4 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f50580d7c7fbf2842448c87113a59e6236cca401 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e392dc2d54d577ac1858e5e638175a42473a5a48bb05dd99ac5b6efc1ddafcd7 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccf860d45d0bdf84a1b65eae7983521a1965330b --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d12f817fcc2ff7083dde92b3ad1b667dc35d37d624596c38baf9e8fce938111 +size 15518626 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..772f31f67b243fede02d435d0e42857a87ad83e6 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4c791af4e6a78a02ff85d97326fd0b1b28b3d689624257b78c95c4f197c1e5 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16a65f0d8159fbc1d35319d8309634ad69c15175 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885ddefdc3cf6db3c7c63ca408ee6952528d20dfafcb1fc5093ab67d5cdf3c49 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..067654b1d63d8dd91e1aaf559e80164cd5b7de62 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4be22641c44fdcf91f9fbdaa5cdc6d8cd3de8510746037b35fc386307b898 +size 15518626 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a9f1db7d1b290c0b567468ffcfcc4009643cc0 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510306602ba6c04b8051d19bbef25c2bef6fcf19dbd1ddf48c4de94d0a4497ca +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9704c90e0d0bc56e250309763485eae8757c3d1a --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49466f2417ee9b8499dcd08b9ab05d354e00637be196f5d63d27e0725d36a43 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c150611d2173ef1c9d033721815665549240ff0d --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7166f8921b094bace354f3e24042aa53259584554ca53eab5c1241bc87c26fc4 +size 15518615 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5686d733c67a8e707b19ffcc7f6a6e207bcb4cc --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a7e8ffbc0f2ef3d2b4d67b47a9cb5c7688dc7470fbf1e3414033f908121add2 +size 15518626 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..33755568fb6f1ff6eefd8fcc219608d11d42ef49 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c240dc831f49331bb3fa1ae288f88e0e004eb54f3ff4dffb77127f3e36a86d26 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..71aa7e658910644e7e501547b7256ed608223a96 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a29bbfe6633775b5f67e8291e8962e2d0a566170f53a6d80827f751de16ccc8 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b4a7e82018828a52819b12c1e9632e1070676a3 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d9c70626d6b7a01c35db9161ddfb41af017ea88ebf531ba4b8cc5382cdce53 +size 15518626 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b5a77ab8dc75d8812b62e97d69d40f398431e18 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3811325a2f0f651ad2475f6a39bc515de4731a84a9fb49ec8251bbea1166ec0 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd691700c2eb95a4887d66661c818437443fa41c --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f1b5cb93df9a1fdac38f763f6f2a95ec0d9004e03c228fd7129262f309e512 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2548f9c1fa207712f85969ecb8f09447e6da46a8 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf660b3550b02d39ee29b4c52bedc4c74ae3bdc6498faad0dd025aa68c354adf +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..29ed42dbf16fa3bc8dff0abb2df53a88e436d8ac --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a408e576b945f2d8184f208c3430b08c6904ab3ae6b45314df4a093fe0ee78 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9df3413fa58a3ebedab507ac3abea9ff6d5827fa --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:903adfc35ebd14cf705b0fca7537e31d45a1f86587eac2f9daa643b3f736cf57 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f76ac843f101183ba6f34ed5782c8dc898aec2c --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01915aa50dadfc3becf62855c51261db732c534718a9b164fd289422600911be +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..82ac8b8816f78bffa128763873ec19514f72f1b5 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d99946dca150404f54c8593dab7157b106a7421d6fceaf7eea5ebf942d49d54 +size 15518743 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6021f075618fba499bc729320a3fd23dd8a5926 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42d742e4834d39c817bbd63cfe0100295c757bdd2f5d82d6604afd55eba9b0e +size 15518626 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..06b1787b53814faa54d45e3c303575ff56e9172e --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dca42c7e2136c259c62a5c784ea7141b93daf8d6a4de926b7745b56c4314f3 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9cc5d9f54aaeb1715aede12cc7c0c361cb46ec25 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663934bb4e328831990f8e9f0ebe95c4d09fe06d5f69435454cdea2d32317ca9 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8873f7ef930e38d9dd4303149aeda77b68cb28af --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f4d6ae38ea014f3b1ae2f282adc6ecd8be7cfe6d1a6218a22aea0a7a1ceaf6 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d544e6973ba621c4a906af9dc9a8b0abaae4198e --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:800e1bc98cbccf0723d8248061d626e7b49efc1bb4505d4fd3ae915bfa056642 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..09ce2b0ea78d91690eb28f9e6465d86c79dfaf27 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b99ae89e22bd54083d81c4690e2f75b1c8e32fb7856f79fa5e01f517b8153e6f +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..27af7230a7486c6a49b71b5b95fd15e4ee6a874e --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3e5131b3b3d75c5ac93d3e69242de9dbc97407d87831a399af98ae953aac52 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..105db94e095af8657d2194b70ab25266fef1b298 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9be4e32f28e2e56798f729bc58d5e2430f8ddf107755e6be57194915f1ff4b5 +size 15518818 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b97dcf06e9fa40cd756baa80ddbcc681b389241e --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808bbe2cb1743736fb53bb9dc17c7c27c03b528bea5b347e07b2abd3b8ded8c2 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..65a3232ff8e9842d215152a80aeb2ceb797842c7 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0238593143f8e31b63c17284e7df6d5877a616815b9a129f1ece271de54420b +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08484dbb1e0f9066219911436604b4e75379e253 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bf1779115e8b68c3bd8d80ec2d8496717c95cf683844ae1966928c20d682616 +size 15518743 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..689d591e1631781a2be2cd8ab80405713ed28a08 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7035bd600825cf519e4ea36d89fa71bf08c7b21359eebef671cf6e747e361128 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..27bcddf9d251b1e7396d010a54ea3e79ae663d39 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d048a7daed611f2624a6ed6b5018972e8bc474d9ccb3deec5f654ea860c44486 +size 15518818 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a94945f79394172a0131dd13e8f19bd04adc43a7 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bd4e5c38e5347bb6ded6e58df001116e14d61bb57718f888fbbf8114c7bc25f +size 15518626 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed2a293185288e57e8de1cca50c6a5073bddc3db --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3743316ec0107e01f3cef3bb9999626a4be84026790e0dd66035c6e4c26c6ad1 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd4e7048d5d7572588180f29caf3e0683d05e663 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d17cc4191f8dbbc43fcd51297cf303d1edc3a3401ec2d0ca27e5a2c7b056ca22 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..faf2a8b6fc6b67222065e07347f759af6391d3cb --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe91aa36448951452627b5a504cc82e49989b7db1ab162c8c75664292d5d3e9f +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..33e8f8eb2b98691aba2c6769acfab8d0258fc41f --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c6de42768a7b6c4418a567ac1ea8859099c95e9ef182334f4c386d6f799236 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..41ef769b6b2a22b0a344740eb6d86ea604b524d7 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cfe26dad94b4ad5d5c3ca656e79414a282f49fa732a216423c5fa8a0312b46 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5e7b411d6db436a866c09e78cd94b1e272ff55e --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de48d5372b18ce5a007fa882aa217cd00ec2baefddaf545da7a7e493fc5ffd7d +size 15518562 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b51c5231610732654fedadbf0047f865bfa1bcd0 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3ac575307411150838bd24216dfd4a7b9d978ce7bc78fbaf0a3b8ba30a82de +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..418f269d0747ff65c1e3071228128bd3dae456d2 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e483460473aab3dc0fba99591a9fd0255a19d61d2fc55c8228ffe2ef63f9acda +size 15518615 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fbd7737210122c31b03739246a2e4acfae87211 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eadb704978de876d79fa5bfd19a2358da1adaee00b99e206ab9fcfcf0e97245 +size 15518818 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d83173c62bc6b566cc8d4f1f23e8724839f64376 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2d0ef6037d9497ebdf81b149cc81140efb740178fc6992b2ec5fb78eaad33b +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08f104b121fd04e5ad3ea8ce097982c472279344 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e60ffe709659c4edc8100e927780d0272340e62ae35ac6b31728d27593ed406b +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e7b7b8618f61ac9ffeb458fc5af4280a9a45b67 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85c1c8abae81fc8562998408b8762f5745d7d5d2e55599531c653cd5bd16a38 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7066641d77defa8c3e5ce87f30eb25bddd4a7bb --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d036a34a7dfb33531917e4640ea58386f0a3ed547366e21cb2940b2069d43f18 +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a191792b7710467eeb5d0b76749975b94cee2a4 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611d35c9a478523d5713fadfee196e833dcced58dcc134259e15e45891d09a3f +size 15518626 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d876df9c47cfc755f3a2240137974c2afac0d261 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e40e59ce4aeac26bba56472465e234cd4d90da860ae7e95f0b08df93089dc3e4 +size 15518818 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecdb3a603fbcd89c90fd83d46e068142aebef750 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53c2d5ffcd577c10e46260211327c8e112b6fb20698239077d2c33c45efce313 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5aa586d220066b83d7af91ae7014ad78640763b --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1efc7b45db1c0262a13ab7c3fb749fa6f71184a5912bf06731c3ba313dff46d2 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4af17d0b22ecb3b27f326fcacc9a23b267c6f0c7 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1839a167c230dde158a1fb23bb50d66c6738f50a79b286c34d5255e73a2fe71f +size 15518754 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..55f0ca27169a59ed8ae39d8bf5fa3919eaccefca --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480360bf3386b0a265f09b94d02e7aeee307a18f673662a347a8418ff879c2fe +size 15518679 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e65982171daac5815c95e68885c1266b7364fdb --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd6f0ae034ce14df1efc0944168a9397a765ae4a2792f79e255fcdb34bfa41f +size 15518818 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9390704878ef930709cce8e7f5ef17380b5f69d5 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7bf5a2918e5b7f1a8c110dfc2838e280f85ca1c826168889eac568a28ad1539 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f52e7453198ae78e3ff250aa417eead3dc78fb0 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e9eab7e6ff27dd7fc6689ae3cabe63b3a041b78554accd94df11018250d802 +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..320e02f712b6e8263e707a55425fb3a9c5e3dd4d --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d626bb731cf4f12eead8ee79e803089d5164eade98d7fe871e377c733ed0bf +size 15518690 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e064ec5f61ec8d8a91477979b9ed1a28b235700e --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448a4737940f7a0da778ae03d6102c65fd5e57dfc7bffaf235c37819b157981c +size 15518743 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..134cdb06f0471db118aaaaa0f3027b6a12357357 --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a623a79531e2581337c5965434a24c51e6e6a437bd9ab56b93790489c5c55c8c +size 15518679 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7c63b634f6db702051262ab3c7419fe3fdc546c --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05bc9e4acbdcc001b97eff427c91d7087b51577a8e85926b6a72dacda91f47d4 +size 15518679 diff --git a/83m20b100m/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/83m20b100m/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6e52d56a5e140c090e251855f7e2ce6f300718f --- /dev/null +++ b/83m20b100m/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3bb1c5fe8078dd9eb05a0f067171c804bd4ba0bab3c7ca941853c62d0c0213 +size 15518743 diff --git a/83m20b100m/global_step37905/layer_01-model_00-model_states.pt b/83m20b100m/global_step37905/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e17ef9d2854578126dd57331504d2bcfcbbae9fe --- /dev/null +++ b/83m20b100m/global_step37905/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7c4aad6ab110aaa7bb481e78e4b94614a103ecdc808a485c9e5aa0d6348cd4 +size 67011843 diff --git a/83m20b100m/global_step37905/layer_03-model_00-model_states.pt b/83m20b100m/global_step37905/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e81d2dacd55d073a4023a75ffba7cf951c9eeb25 --- /dev/null +++ b/83m20b100m/global_step37905/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b7bbab364b67c2642beea0eaaae798b64cb91ff89d2bddde820a7f46e2af1e8 +size 9851395 diff --git a/83m20b100m/global_step37905/layer_04-model_00-model_states.pt b/83m20b100m/global_step37905/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..36d4a4c00931c9bd3f66ee795083e7364b163d0a --- /dev/null +++ b/83m20b100m/global_step37905/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241088b218154d8883a12ba802fe99fe18ce8e015da43028ef64a00f30f21e46 +size 9851395 diff --git a/83m20b100m/global_step37905/layer_05-model_00-model_states.pt b/83m20b100m/global_step37905/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0ac51acdc00b5b91dfc2e385f056387da61d1d5 --- /dev/null +++ b/83m20b100m/global_step37905/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49a9060e08c5f618a4903f459831c02685fba9ce3f75416ade424195b4bdabe +size 9851395 diff --git a/83m20b100m/global_step37905/layer_06-model_00-model_states.pt b/83m20b100m/global_step37905/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f07fecb0f0be96889969aa1cc6542cb2f31baa94 --- /dev/null +++ b/83m20b100m/global_step37905/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b37c7ec5b40acaacfa76bedbfafb53df87e0cfe05b8348bdd3992d8dad5bb9 +size 9851395 diff --git a/83m20b100m/global_step37905/layer_07-model_00-model_states.pt b/83m20b100m/global_step37905/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..051bb37c5da417634edec9f724061ba6abc05a7c --- /dev/null +++ b/83m20b100m/global_step37905/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:361d25b690385ce398ed0eb84ff287963678cae063b33edc9e76fe3fecabc8be +size 9851395 diff --git a/83m20b100m/global_step37905/layer_08-model_00-model_states.pt b/83m20b100m/global_step37905/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..837b5b9aa93da1a6841a44fefa0fc5dd7ba91836 --- /dev/null +++ b/83m20b100m/global_step37905/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200215b2de32409e2322417963db51ecabae5570492ffb84b56c7e0ef1f17574 +size 9851395 diff --git a/83m20b100m/global_step37905/layer_09-model_00-model_states.pt b/83m20b100m/global_step37905/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0aaff772c4dbbafed8364902e9d66cd8a52bf6c8 --- /dev/null +++ b/83m20b100m/global_step37905/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47873b5e28e00576b50e94afc874417baedb791936ac8b37fcc49debf672ce41 +size 9851395 diff --git a/83m20b100m/global_step37905/layer_10-model_00-model_states.pt b/83m20b100m/global_step37905/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c282fc35fb887b1bf1721b896f1d425c8bdd86c --- /dev/null +++ b/83m20b100m/global_step37905/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b26360aed99a071a2cb733fa420fd5ce652a150a1e2fe702b825a30283e29de +size 9851395 diff --git a/83m20b100m/global_step37905/layer_11-model_00-model_states.pt b/83m20b100m/global_step37905/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b40c41f867eccc4db12fbd082793905c6e3a744 --- /dev/null +++ b/83m20b100m/global_step37905/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c9085777316571634aaaec06dcee46af8c503b2016ee5da1589481c19aecc37 +size 9851395 diff --git a/83m20b100m/global_step37905/layer_12-model_00-model_states.pt b/83m20b100m/global_step37905/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3f1bdfc9bb6ae3e49b8acb9802fb166753d2d39 --- /dev/null +++ b/83m20b100m/global_step37905/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14434e8ace924fcf0642903fb2a645eb266190a41b1197ffa57fe02821037133 +size 9851395 diff --git a/83m20b100m/global_step37905/layer_14-model_00-model_states.pt b/83m20b100m/global_step37905/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..af8fb9d5b21c593a526dfcda5f9d807c063960c7 --- /dev/null +++ b/83m20b100m/global_step37905/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204f6cb30e0f8d74e914c20a12b83c995a4c0dcb360c281035fbee2b715de9e2 +size 3779 diff --git a/83m20b100m/global_step37905/mp_rank_00_model_states.pt b/83m20b100m/global_step37905/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e22e04f4b492a3b53ab82806839bcde6393c7fc1 --- /dev/null +++ b/83m20b100m/global_step37905/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c487b9bcb6cf4609a2ff8cd1f06c26824d402c1cd356399888cd737d04b7fe41 +size 31603 diff --git a/83m20b100m/sbatch_83m20b100m.sh b/83m20b100m/sbatch_83m20b100m.sh new file mode 100644 index 0000000000000000000000000000000000000000..2199cf74f51c3edc740c70170858f3c6fd5235c4 --- /dev/null +++ b/83m20b100m/sbatch_83m20b100m.sh @@ -0,0 +1,168 @@ +#!/bin/bash +#SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901 +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 2-0:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=83m20b100m + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT +mkdir -p $CHECKPOINT_PATH +mkdir -p $TENSORBOARD_PATH + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train100m.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_1B5_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_74M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 19873180000 +# -> Samples: 9703701 +TRAIN_SAMPLES=9_703_701 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 97_037 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --loss-scale 12 \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + --checkpoint-activations \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/83m20b100m/sbatch_83m20b100mval.sh b/83m20b100m/sbatch_83m20b100mval.sh new file mode 100644 index 0000000000000000000000000000000000000000..8f52874f91fab95713ef0b90c276a295f1be5d59 --- /dev/null +++ b/83m20b100m/sbatch_83m20b100mval.sh @@ -0,0 +1,173 @@ +#!/bin/bash +#SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901 +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 2-0:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=83m20b100mval +VARIANT_CKPT=83m20b100m + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT +mkdir -p $CHECKPOINT_PATH +mkdir -p $TENSORBOARD_PATH + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train20b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_74M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 19873180000 +# -> Samples: 9703701 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --loss-scale 12 \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + --checkpoint-activations \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/83m20b100m/tensorboard_83m20b100m/events.out.tfevents.1678742299.nid006591.123401.0 b/83m20b100m/tensorboard_83m20b100m/events.out.tfevents.1678742299.nid006591.123401.0 new file mode 100644 index 0000000000000000000000000000000000000000..0869544375b6aa261ff1eb7f3c59bf15106a383a --- /dev/null +++ b/83m20b100m/tensorboard_83m20b100m/events.out.tfevents.1678742299.nid006591.123401.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de34f45c2c8fb71dd1f154b00c97dd7e5e689ccf9efe4c410e1db9bd38937350 +size 67726214 diff --git a/83m20b100m/tensorboard_83m20b100mval/events.out.tfevents.1678780596.nid006591.97425.0 b/83m20b100m/tensorboard_83m20b100mval/events.out.tfevents.1678780596.nid006591.97425.0 new file mode 100644 index 0000000000000000000000000000000000000000..5310f82eceabd52ae0e1143bbfba8b91415770bf --- /dev/null +++ b/83m20b100m/tensorboard_83m20b100mval/events.out.tfevents.1678780596.nid006591.97425.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7ada5ef4c72a37b553b0e11f1d8ed92c419752534e044d4af5d34514c48a2ea +size 980 diff --git a/83m20b1b5/2809866.err b/83m20b1b5/2809866.err new file mode 100644 index 0000000000000000000000000000000000000000..6c9e8ce8d5ecfc50a67b7e932ef399e82176c436 --- /dev/null +++ b/83m20b1b5/2809866.err @@ -0,0 +1,1113 @@ +3: 2023-02-05 01:56:45.552917: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.552937: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.552939: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.552961: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.552968: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.552997: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553011: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.555220: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.555228: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.555278: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.555309: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.555310: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.555314: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.555316: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.555410: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.555611: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.555628: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.555648: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.555662: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.555683: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.555685: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.555695: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.555739: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.555826: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.555839: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.555841: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.555872: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.555883: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.556148: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.556144: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.556182: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: 2023-02-05 01:56:45.555887: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.555864: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.555901: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.556210: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.556220: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.556318: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.556348: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.556357: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.556647: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.556666: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.556677: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.556699: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.556710: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.556726: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.556736: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.556772: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556998: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556990: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.557001: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.557060: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.557079: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.557055: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.557082: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.557089: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.557255: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.557278: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.557324: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.557336: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.557341: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.557355: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.557364: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.557399: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:57.485166: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485195: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485214: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485228: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485240: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:56:57.485293: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-05 01:56:57.485267: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485251: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485238: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485323: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:56:57.485338: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.485302: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:56:57.485345: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:56:57.485371: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:56:57.485365: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:56:57.485352: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:56:57.485366: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.485368: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.485376: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.485395: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.485421: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:56:57.485550: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.485355: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.485428: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-05 01:56:57.486094: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.486114: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.485434: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-05 01:56:57.486122: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486135: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.486142: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486144: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.485607: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-05 01:56:57.486148: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486152: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485508: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.486178: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.485579: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-02-05 01:56:57.485499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485519: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485893: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.485603: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-02-05 01:56:57.485550: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.486200: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485918: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.485658: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-02-05 01:56:57.485543: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.486214: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486223: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486232: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486238: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486241: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485588: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-02-05 01:56:57.485523: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:56:57.486253: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.485662: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485542: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-02-05 01:56:57.485566: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485523: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485959: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.485672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-05 01:56:57.485621: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-02-05 01:56:57.485589: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485538: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:56:57.485783: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-05 01:56:57.485636: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485624: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-02-05 01:56:57.485533: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.486356: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485547: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485977: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.485645: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485560: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-02-05 01:56:57.485580: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485551: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485986: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-05 01:56:57.486017: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-05 01:56:57.486022: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-05 01:56:57.486026: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486428: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486442: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.485664: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485633: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.486420: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-05 01:56:57.486425: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-05 01:56:57.486441: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485553: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.486462: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486480: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486491: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.485683: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485615: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.486481: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-05 01:56:57.486482: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486502: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486511: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485533: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.486489: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486514: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.485684: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485623: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.486537: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.486423: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485688: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-02-05 01:56:57.486440: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.486450: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.486454: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.486468: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486459: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486476: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486479: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.486479: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.486496: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.486501: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486477: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486488: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486483: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486486: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.486506: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.486510: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486513: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486509: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486515: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486499: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.486533: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486514: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486530: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:57:23.201010: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-02-05 01:57:23.201032: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.201023: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201006: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.201055: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-05 01:57:23.201158: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.201226: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-02-05 01:57:23.201064: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201032: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-05 01:57:23.201277: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201190: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.201075: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.201067: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201040: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201201: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.201288: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-05 01:57:23.201310: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-02-05 01:57:23.201099: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.201087: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201046: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201220: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201430: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-05 01:57:23.201287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.201116: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.201101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201056: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.201320: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201232: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201454: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-05 01:57:23.201302: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.201125: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.201108: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-05 01:57:23.201062: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.201341: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-05 01:57:23.201318: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.201113: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-05 01:57:23.201066: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.201353: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201491: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-05 01:57:23.201343: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.201133: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-05 01:57:23.201244: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.201368: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-02-05 01:57:23.201149: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201505: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-05 01:57:23.201354: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201250: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.201373: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-02-05 01:57:23.201163: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201513: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-05 01:57:23.201378: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201270: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.201563: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201527: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201528: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.227540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.227551: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.227590: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.227588: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.227606: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.227621: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.227641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.227663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230545: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230526: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230525: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230548: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230528: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230558: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230528: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230528: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230568: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230568: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230530: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-05 01:57:23.230566: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230574: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230536: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-05 01:57:23.230572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.230541: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230533: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-05 01:57:23.230573: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-05 01:57:23.230716: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230534: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-05 01:57:23.230576: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-05 01:57:23.230718: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230545: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230545: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.230546: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230582: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230587: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230544: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230549: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230550: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230539: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-05 01:57:23.230590: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230591: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230721: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230550: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230553: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.230551: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230555: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230654: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230577: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230555: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230557: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230667: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.230858: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-05 01:57:23.230723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230598: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230558: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230561: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230725: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-05 01:57:23.230862: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230722: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-05 01:57:23.230857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-05 01:57:23.230860: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230737: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230738: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.230875: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230738: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230741: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230742: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.230867: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-05 01:57:23.230744: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230783: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-05 01:57:23.230866: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230797: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.230870: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.230878: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.230868: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.230881: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.230885: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.230888: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.230888: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.230890: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.230893: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231185: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-05 01:57:23.231202: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231212: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-02-05 01:57:23.231193: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231218: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231214: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-02-05 01:57:23.231188: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.231270: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231216: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-02-05 01:57:23.231189: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.231269: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231225: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231217: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-02-05 01:57:23.231187: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.231273: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231201: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231220: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-02-05 01:57:23.231197: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.231277: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231222: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.231274: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231222: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.231273: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231238: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-05 01:57:23.231239: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.231241: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-05 01:57:23.231242: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-05 01:57:23.231245: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.231280: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-05 01:57:23.231245: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.231277: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.231289: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.231290: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.231290: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.231295: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.231296: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.231296: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.231298: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.231298: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231195: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231194: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231210: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231214: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231215: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231212: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231216: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231219: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231219: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +4: Successfully preprocessed all matching files. +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +0: Building extension module utils... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module utils... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: +1: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: +7: +0: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +0: Building extension module utils... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Loading extension module utils...Loading extension module utils... +0: Loading extension module utils... +0: +0: Loading extension module utils... +0: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +4: Loading extension module utils... +1: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +7: Loading extension module utils... +5: Loading extension module utils... +7: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: Loading extension module utils...Loading extension module utils... +0: +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +7: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils... +4: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +6: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +6: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +7: +7: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +7: +7: +7: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +3: +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +5: +5: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +5: +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/83m20b1b5/2809866.out b/83m20b1b5/2809866.out new file mode 100644 index 0000000000000000000000000000000000000000..431792fa1604ad76a83b82f5c82dd818cc2f2e55 --- /dev/null +++ b/83m20b1b5/2809866.out @@ -0,0 +1,4385 @@ +Model parameters: d_model 640 ffw_size 2560 kv_size 64 n_heads 10 n_layers 10 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 10 --hidden-size 640 --num-attention-heads 10 --kv-channels 64 --ffn-hidden-size 2560 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --loss-scale 12 --clip-grad 1.0 --kill-switch-path kill-switch-83m20b1b5val --bf16 --checkpoint-activations --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_83m20b1b5val --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_83m20b1b5 --load checkpoints_83m20b1b5 --train-weighted-split-paths-path train20b.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/2809866.json --zero-stage 0 +START 2809866: Sun Feb 5 01:55:08 EET 2023 +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 42.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 40.0c 100.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 38.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 38.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 48.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 45.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 44.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 41.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 42.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 39.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 42.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 44.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 47.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 40.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 44.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 39.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 46.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 40.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 44.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 34.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 40.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 41.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 40.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 39.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 43.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 40.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 40.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 45.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +1: Launching on nid006659 (1/8), master nid006658 port 9999, GPUs 8, CUDA: True +7: Launching on nid006665 (7/8), master nid006658 port 9999, GPUs 8, CUDA: True +3: Launching on nid006661 (3/8), master nid006658 port 9999, GPUs 8, CUDA: True +4: Launching on nid006662 (4/8), master nid006658 port 9999, GPUs 8, CUDA: True +6: Launching on nid006664 (6/8), master nid006658 port 9999, GPUs 8, CUDA: True +0: Launching on nid006658 (0/8), master nid006658 port 9999, GPUs 8, CUDA: True +5: Launching on nid006663 (5/8), master nid006658 port 9999, GPUs 8, CUDA: True +2: Launching on nid006660 (2/8), master nid006658 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... True +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2809866.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... True +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 2560 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 640 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-83m20b1b5val +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_83m20b1b5 +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... 12.0 +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 10 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 10 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_83m20b1b5 +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_83m20b1b5val +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-05 01:58:41,691] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +7: > setting tensorboard ... +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.117 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 102 +0: [1/1] c++ scaled_masked_softmax_hip.o scaled_masked_softmax_hip.cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: ninja: no work to do. +0: >>> done with compiling and loading fused kernels. Compilation time: 23.945 seconds +0: time to initialize megatron (seconds): 76.556 +0: [after megatron is initialized] datetime: 2023-02-05 01:59:08 +0: building GPT model ... +0: [2023-02-05 01:59:08,686] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-05 01:59:08,687] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-05 01:59:08,687] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 63.66 GB, percent = 12.6% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-05 01:59:10,673] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=17 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: undo +0: 14: MixedFusedLayerNorm +0: 15: EmbeddingPipe +0: 16: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-05 01:59:10,903] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-05 01:59:10,903] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-05 01:59:10,904] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 63.68 GB, percent = 12.7% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-05 01:59:10,905] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-05 01:59:22,634] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-05 01:59:22,635] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-05 01:59:22,635] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-05 01:59:22,637] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-05 01:59:22,637] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-05 01:59:22,747] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-05 01:59:22,747] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-05 01:59:22,748] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.36 GB, percent = 12.8% +0: ninja: no work to do. +0: Time to load utils op: 0.21356892585754395 seconds +0: Time to load utils op: 0.10244131088256836 seconds +0: [2023-02-05 01:59:22,976] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-05 01:59:22,977] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-05 01:59:22,977] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.37 GB, percent = 12.8% +0: Time to load utils op: 0.0006566047668457031 seconds +0: ninja: no work to do. +0: Time to load utils op: 0.144517183303833 seconds +0: Time to load utils op: 0.0006649494171142578 seconds +0: [2023-02-05 01:59:23,105] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: Time to load utils op: 0.20283126831054688 secondsTime to load utils op: 0.2031090259552002 seconds +0: +0: Time to load utils op: 0.20303630828857422 seconds +0: [2023-02-05 01:59:23,105] [INFO] [utils.py:828:see_memory_usage] MA 0.37 GB Max_MA 0.37 GB CA 0.48 GB Max_CA 0 GB +0: Time to load utils op: 0.20197606086730957 seconds +0: [2023-02-05 01:59:23,106] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.41 GB, percent = 12.8% +0: Time to load utils op: 0.20193934440612793 seconds +0: Time to load utils op: 0.0003409385681152344 seconds +3: Time to load utils op: 0.21107149124145508 seconds +3: Time to load utils op: 0.21108102798461914 seconds +3: Time to load utils op: 0.21110844612121582 seconds +3: Time to load utils op: 0.21110773086547852 seconds +0: Time to load utils op: 0.00044345855712890625 secondsTime to load utils op: 0.0004069805145263672 seconds +0: +3: Time to load utils op: 0.21111416816711426 seconds +0: Time to load utils op: 0.0004000663757324219 seconds +3: Time to load utils op: 0.21112561225891113 seconds +3: Time to load utils op: 0.2111189365386963 seconds +3: Time to load utils op: 0.2111678123474121 seconds +0: Time to load utils op: 0.0003898143768310547 seconds +4: Time to load utils op: 0.2135756015777588 secondsTime to load utils op: 0.2135617733001709 secondsTime to load utils op: 0.2136232852935791 seconds +4: +4: +4: Time to load utils op: 0.2136094570159912 seconds +4: Time to load utils op: 0.2136216163635254 secondsTime to load utils op: 0.21364378929138184 seconds +4: +4: Time to load utils op: 0.21360373497009277 seconds +4: Time to load utils op: 0.21402907371520996 seconds +2: Time to load utils op: 0.21251249313354492 seconds +2: Time to load utils op: 0.21252775192260742 seconds +2: Time to load utils op: 0.21254825592041016 seconds +2: Time to load utils op: 0.21256613731384277 seconds +2: Time to load utils op: 0.21259593963623047 secondsTime to load utils op: 0.2125718593597412 seconds +2: +2: Time to load utils op: 0.21257448196411133 seconds +2: Time to load utils op: 0.21259474754333496 seconds +1: Time to load utils op: 0.21318626403808594 seconds +1: Time to load utils op: 0.2131950855255127 seconds +1: Time to load utils op: 0.21323037147521973 seconds +1: Time to load utils op: 0.21321964263916016 secondsTime to load utils op: 0.21322321891784668 seconds +1: +1: Time to load utils op: 0.21323251724243164 seconds +1: Time to load utils op: 0.21323347091674805 seconds +1: Time to load utils op: 0.21323347091674805 seconds +6: Time to load utils op: 0.21230721473693848 secondsTime to load utils op: 0.21230816841125488 seconds +6: +6: Time to load utils op: 0.21226835250854492 seconds +6: Time to load utils op: 0.21234631538391113 secondsTime to load utils op: 0.21235156059265137 seconds +6: +6: Time to load utils op: 0.21234798431396484 secondsTime to load utils op: 0.21236276626586914 seconds +6: +6: Time to load utils op: 0.21236157417297363 seconds +5: Time to load utils op: 0.21176791191101074 seconds +5: Time to load utils op: 0.2118086814880371 seconds +5: Time to load utils op: 0.21179866790771484 seconds +5: Time to load utils op: 0.21180295944213867 seconds +5: Time to load utils op: 0.21180295944213867 seconds +5: Time to load utils op: 0.21181988716125488 seconds +5: Time to load utils op: 0.211822509765625 seconds +5: Time to load utils op: 0.21181607246398926 seconds +7: Time to load utils op: 0.21203994750976562 secondsTime to load utils op: 0.2120509147644043 seconds +7: +7: Time to load utils op: 0.2120208740234375 seconds +7: Time to load utils op: 0.21206307411193848 seconds +7: Time to load utils op: 0.21204280853271484 seconds +7: Time to load utils op: 0.21207427978515625 seconds +7: Time to load utils op: 0.21208786964416504 secondsTime to load utils op: 0.21209383010864258 seconds +7: +4: Time to load utils op: 0.0008296966552734375 seconds +4: Time to load utils op: 0.0010933876037597656 seconds +4: Time to load utils op: 0.001096963882446289 seconds +4: Time to load utils op: 0.001140594482421875 seconds +4: Time to load utils op: 0.0012555122375488281 seconds +4: Time to load utils op: 0.0011527538299560547 seconds +4: Time to load utils op: 0.0012135505676269531 secondsTime to load utils op: 0.0012898445129394531 seconds +4: +7: Time to load utils op: 0.0007259845733642578 seconds +6: Time to load utils op: 0.0007791519165039062 seconds +6: Time to load utils op: 0.0008947849273681641 seconds +6: Time to load utils op: 0.0007708072662353516 seconds +6: Time to load utils op: 0.0009148120880126953 seconds +7: Time to load utils op: 0.0010428428649902344 seconds +6: Time to load utils op: 0.0010688304901123047 secondsTime to load utils op: 0.0008685588836669922 seconds +6: +7: Time to load utils op: 0.0010488033294677734 seconds +6: Time to load utils op: 0.0009434223175048828 seconds +7: Time to load utils op: 0.0010907649993896484 seconds +7: Time to load utils op: 0.0010790824890136719 seconds +6: Time to load utils op: 0.001216888427734375 seconds +3: Time to load utils op: 0.0011167526245117188 seconds +3: Time to load utils op: 0.000885009765625 secondsTime to load utils op: 0.0010395050048828125 seconds +3: +7: Time to load utils op: 0.0011396408081054688 seconds +7: Time to load utils op: 0.0011572837829589844 seconds +5: Time to load utils op: 0.0008459091186523438 seconds +7: Time to load utils op: 0.0011119842529296875 seconds +2: Time to load utils op: 0.0012309551239013672 seconds +3: Time to load utils op: 0.001331329345703125 seconds +3: Time to load utils op: 0.0014264583587646484 seconds +3: Time to load utils op: 0.0013554096221923828 seconds +3: Time to load utils op: 0.0012898445129394531 seconds +3: Time to load utils op: 0.0013213157653808594 seconds +2: Time to load utils op: 0.0015044212341308594 seconds +5: Time to load utils op: 0.0011487007141113281 seconds +2: Time to load utils op: 0.0015642642974853516 seconds +2: Time to load utils op: 0.0015468597412109375 secondsTime to load utils op: 0.0015153884887695312 seconds +2: +2: Time to load utils op: 0.0016164779663085938 seconds +2: Time to load utils op: 0.0015192031860351562 seconds +2: Time to load utils op: 0.0015044212341308594 seconds +5: Time to load utils op: 0.0012946128845214844 seconds +5: Time to load utils op: 0.0013935565948486328 secondsTime to load utils op: 0.0015249252319335938 secondsTime to load utils op: 0.0015189647674560547 seconds +5: +5: +5: Time to load utils op: 0.001550436019897461 seconds +5: Time to load utils op: 0.0015306472778320312 seconds +1: Time to load utils op: 0.001054525375366211 seconds +1: Time to load utils op: 0.0013282299041748047 seconds +1: Time to load utils op: 0.0014200210571289062 seconds +1: Time to load utils op: 0.0013773441314697266 seconds +1: Time to load utils op: 0.0013768672943115234 seconds +1: Time to load utils op: 0.0014524459838867188 secondsTime to load utils op: 0.0013697147369384766 seconds +1: +1: Time to load utils op: 0.0014486312866210938 seconds +0: [2023-02-05 01:59:23,223] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-05 01:59:23,223] [INFO] [utils.py:828:see_memory_usage] MA 0.37 GB Max_MA 0.37 GB CA 0.48 GB Max_CA 0 GB +0: [2023-02-05 01:59:23,224] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.51 GB, percent = 12.8% +0: [2023-02-05 01:59:23,325] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-05 01:59:23,326] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:23,326] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.51 GB, percent = 12.8% +0: [2023-02-05 01:59:23,422] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-05 01:59:23,423] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:23,423] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.51 GB, percent = 12.8% +0: [2023-02-05 01:59:23,519] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-05 01:59:23,519] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:23,519] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.51 GB, percent = 12.8% +0: [2023-02-05 01:59:23,613] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-05 01:59:23,613] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:23,613] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.51 GB, percent = 12.8% +0: [2023-02-05 01:59:23,712] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-05 01:59:23,712] [INFO] [utils.py:828:see_memory_usage] MA 0.48 GB Max_MA 0.48 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:23,713] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.51 GB, percent = 12.8% +0: [2023-02-05 01:59:23,806] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-05 01:59:23,807] [INFO] [utils.py:828:see_memory_usage] MA 0.48 GB Max_MA 0.48 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:23,807] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 64.51 GB, percent = 12.8% +0: [2023-02-05 01:59:23,807] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-05 01:59:23,807] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-05 01:59:23,807] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-05 01:59:23,807] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-05 01:59:23,807] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-05 01:59:23,808] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-05 01:59:23,809] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-05 01:59:23,810] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-05 01:59:23,810] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-05 01:59:23,810] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-05 01:59:23,810] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.00043320655822753906 seconds +0: [2023-02-05 01:59:23,810] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-05 01:59:23,867] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=17 [0, 17) STAGE_PARAMS=82741760 (82.742M) TOTAL_PARAMS=82741760 (82.742M) UNIQUE_PARAMS=82741760 (82.742M) +6: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:23,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:23,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:23,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:24,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:24,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:24,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:24,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:24,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:24,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:24,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:24,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:24,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:24,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:24,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:24,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:24,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:24,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:24,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:24,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:24,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:24,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:24,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:24,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:24,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:24,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:24,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:24,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:24,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:24,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:24,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:24,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:24,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:24,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:24,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:24,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:24,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:24,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:24,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:24,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:24,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:24,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:24,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:24,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:24,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,275] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:24,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:24,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:24,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:24,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:24,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:24,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:24,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:24,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:24,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:24,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:24,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:24,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:24,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:24,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:24,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:24,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:24,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:24,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:24,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:24,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:24,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:24,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:24,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:24,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:24,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:24,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:24,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:24,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:24,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:24,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:24,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:24,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:24,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:24,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,862] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:24,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:24,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:24,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:24,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:24,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:24,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:24,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:24,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:25,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: > overriding learning rate value to 0.0002 +3: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding minimum learning rate value to 2e-05 +1: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding warmup iterations value to 0 +3: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: > overriding total number of iterations value to 1 +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding decay style value to cosine +1: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:25,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:25,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:25,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:25,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:25,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,082] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +5: [2023-02-05 01:59:25,083] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +7: [2023-02-05 01:59:25,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:25,087] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +7: [2023-02-05 01:59:25,088] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +5: [2023-02-05 01:59:25,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,092] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +1: [2023-02-05 01:59:25,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:25,093] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +5: [2023-02-05 01:59:25,094] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +3: [2023-02-05 01:59:25,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,094] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +6: [2023-02-05 01:59:25,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:25,094] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +1: [2023-02-05 01:59:25,095] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +6: [2023-02-05 01:59:25,095] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +3: [2023-02-05 01:59:25,096] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +0: [2023-02-05 01:59:25,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:25,100] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +4: [2023-02-05 01:59:25,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:25,100] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +0: [2023-02-05 01:59:25,101] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +1: [2023-02-05 01:59:25,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:25,101] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +4: [2023-02-05 01:59:25,102] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +1: [2023-02-05 01:59:25,103] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +2: [2023-02-05 01:59:25,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:25,103] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +1: [2023-02-05 01:59:25,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:25,104] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +2: [2023-02-05 01:59:25,105] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +1: [2023-02-05 01:59:25,106] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +3: [2023-02-05 01:59:25,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,107] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +3: [2023-02-05 01:59:25,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,109] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +3: [2023-02-05 01:59:25,109] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +6: [2023-02-05 01:59:25,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:25,109] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +0: [2023-02-05 01:59:25,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:25,110] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +3: [2023-02-05 01:59:25,110] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +3: [2023-02-05 01:59:25,110] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +6: [2023-02-05 01:59:25,111] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +5: [2023-02-05 01:59:25,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,111] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +3: [2023-02-05 01:59:25,111] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +4: [2023-02-05 01:59:25,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:25,112] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +0: [2023-02-05 01:59:25,112] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +3: [2023-02-05 01:59:25,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,112] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +5: [2023-02-05 01:59:25,113] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +4: [2023-02-05 01:59:25,113] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +6: [2023-02-05 01:59:25,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,113] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +6: [2023-02-05 01:59:25,113] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +6: [2023-02-05 01:59:25,114] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +0: [2023-02-05 01:59:25,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:25,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:25,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:25,115] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +0: [2023-02-05 01:59:25,115] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +0: [2023-02-05 01:59:25,115] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +6: [2023-02-05 01:59:25,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:25,115] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +4: [2023-02-05 01:59:25,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:25,116] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +0: [2023-02-05 01:59:25,116] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +0: [2023-02-05 01:59:25,116] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +0: [2023-02-05 01:59:25,116] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +7: [2023-02-05 01:59:25,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:25,116] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +6: [2023-02-05 01:59:25,116] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +7: [2023-02-05 01:59:25,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:25,117] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +4: [2023-02-05 01:59:25,117] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +2: [2023-02-05 01:59:25,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:25,118] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +7: [2023-02-05 01:59:25,118] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +4: [2023-02-05 01:59:25,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:25,118] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +7: [2023-02-05 01:59:25,119] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +2: [2023-02-05 01:59:25,119] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +4: [2023-02-05 01:59:25,120] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +5: [2023-02-05 01:59:25,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,121] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +4: [2023-02-05 01:59:25,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:25,122] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +4: [2023-02-05 01:59:25,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,123] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +7: [2023-02-05 01:59:25,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:25,123] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +7: [2023-02-05 01:59:25,123] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +4: [2023-02-05 01:59:25,124] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +7: [2023-02-05 01:59:25,124] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +4: [2023-02-05 01:59:25,124] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +5: [2023-02-05 01:59:25,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,125] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +2: [2023-02-05 01:59:25,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:25,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:25,126] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +4: [2023-02-05 01:59:25,126] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +5: [2023-02-05 01:59:25,127] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +4: [2023-02-05 01:59:25,127] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +2: [2023-02-05 01:59:25,127] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +1: [2023-02-05 01:59:25,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:25,128] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +2: [2023-02-05 01:59:25,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:25,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:25,129] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +2: [2023-02-05 01:59:25,129] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +1: [2023-02-05 01:59:25,129] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +2: [2023-02-05 01:59:25,130] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +2: [2023-02-05 01:59:25,130] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +4: [2023-02-05 01:59:25,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:25,133] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +6: [2023-02-05 01:59:25,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:25,133] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +4: [2023-02-05 01:59:25,135] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +6: [2023-02-05 01:59:25,135] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +6: [2023-02-05 01:59:25,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:25,136] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +2: [2023-02-05 01:59:25,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:25,136] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +6: [2023-02-05 01:59:25,137] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +6: [2023-02-05 01:59:25,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:25,138] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +2: [2023-02-05 01:59:25,138] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +6: [2023-02-05 01:59:25,139] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +3: [2023-02-05 01:59:25,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,139] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +2: [2023-02-05 01:59:25,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:25,140] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +3: [2023-02-05 01:59:25,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:25,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,140] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +7: [2023-02-05 01:59:25,140] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +5: [2023-02-05 01:59:25,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,141] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +3: [2023-02-05 01:59:25,141] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +2: [2023-02-05 01:59:25,141] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +1: [2023-02-05 01:59:25,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:25,141] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +7: [2023-02-05 01:59:25,142] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +3: [2023-02-05 01:59:25,142] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +5: [2023-02-05 01:59:25,142] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +1: [2023-02-05 01:59:25,143] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +0: [2023-02-05 01:59:25,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:25,145] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +0: [2023-02-05 01:59:25,146] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +5: [2023-02-05 01:59:25,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,146] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +5: [2023-02-05 01:59:25,147] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +1: [2023-02-05 01:59:25,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:25,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:25,149] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +7: [2023-02-05 01:59:25,149] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +7: [2023-02-05 01:59:25,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:25,149] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +1: [2023-02-05 01:59:25,150] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +7: [2023-02-05 01:59:25,150] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +7: [2023-02-05 01:59:25,150] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +3: [2023-02-05 01:59:25,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:25,152] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +3: [2023-02-05 01:59:25,153] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +0: [2023-02-05 01:59:25,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:25,158] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +0: [2023-02-05 01:59:25,160] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +7: [2023-02-05 01:59:25,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:25,167] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +7: [2023-02-05 01:59:25,169] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +2: [2023-02-05 01:59:25,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:25,172] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +2: [2023-02-05 01:59:25,173] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +6: [2023-02-05 01:59:25,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:25,180] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +6: [2023-02-05 01:59:25,181] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +1: [2023-02-05 01:59:25,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:25,186] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +1: [2023-02-05 01:59:25,187] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +5: [2023-02-05 01:59:25,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:25,199] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +5: [2023-02-05 01:59:25,200] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +1: [2023-02-05 01:59:25,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:25,201] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +1: [2023-02-05 01:59:25,203] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +0: [2023-02-05 01:59:25,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:25,221] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +0: [2023-02-05 01:59:25,223] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +0: successfully loaded checkpoint from checkpoints_83m20b1b5 at iteration 0 +7: time (ms) | load-checkpoint: 1358.33 +0: estimated model parameters: 0.08274176 +0: estimated model parameters without embeddings: 0.04923648 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-05 01:59:25 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.015317 seconds +0: number of documents: 41786294 +0: > dataset split: +0: train: +0: document indices in [0, 41786294) total of 41786294 documents +0: > WARNING: could not find index map files, building the indices on rank 0 ... +0: > only one epoch required, setting separate_last_epoch to False +0: > elasped time to build and save doc-idx mapping (seconds): 3.015537 +0: using: +0: number of documents: 41786294 +0: number of epochs: 1 +0: sequence length: 2048 +0: total number of samples: 9767462 +0: > elasped time to build and save sample-idx mapping (seconds): 0.491719 +0: > building shuffle index with split [0, 9767462) and [9767462, 9767462) ... +0: > elasped time to build and save shuffle-idx mapping (seconds): 0.259357 +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.047 seconds +0: total number of samples: 9767463 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.066532 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.070 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-05 01:59:44 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 17063.94 | train/valid/test-data-iterators-setup: 18442.67 +0: [after training is done] datetime: 2023-02-05 01:59:44 +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:553:forward] Activation Checkpointing Information +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:554:forward] ----Partition Activations False, CPU CHECKPOINTING False +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:557:forward] ----contiguous Memory Checkpointing False with None total layers +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:560:forward] ----Synchronization False +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:561:forward] ----Profiling time in checkpointing False +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.618937E+00 | lm loss PPL: 3.729791E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 2809866: Sun Feb 5 02:00:10 EET 2023 diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8f9e7760a7dfe39ede7b7d9ce0a9e1955a708a83 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.22143394507810327, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02465792463448575}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.057248981329799054, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001192612421597827}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.24543061097061072, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003908026836015179}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08800136491523618, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016491169576186293}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.026714899334789086, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007442762398954055}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1178094552104916, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027477564434458214}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.041199412710563856, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010517042389501993}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.055526615857988376, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001130839530057266}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.24063012834551578, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003857572393248235}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08560059630016492, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015823553571084356}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.054979572657230524, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011215172613670027}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23729393562344675, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003728197228303874}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08463176069741568, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001556056466058389}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ea2b3b4caa7b2c4213d38bda6859b4fa63cead58 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1624501575699319, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02485001075950609}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04976906607582228, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001223285628673813}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22912868657496024, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004000208428173092}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07652804993972759, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015509513452004412}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021280843722581117, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006558396664738548}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09698896856082298, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026302797431824575}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.033033827764874776, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009553089639751551}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.048710096616471736, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011961828433359935}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.22437608864144468, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003906105502350576}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0749498233935733, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001512700175941009}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04804749736676356, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011967224986575548}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.219947744667869, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038099663633966267}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.0738050333776553, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015085683805012107}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..eb8f791483838aa780cf0f83a3b16799be7828e0 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1339593574453344, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.011475928272299842}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.049361448132339274, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001239016812669748}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21230159325238288, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003778690654328066}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07387997790765682, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001583264492597526}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.01991718794966518, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006694648433471455}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.08574459105462398, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002452796936423602}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.030311839537456788, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009405456802500011}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04761539861338694, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011459311415354199}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.20795605555159263, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0037279876892756243}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07186428635373836, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015257825086432422}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.047200993059258047, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011619201330870998}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.20380441991273304, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036356400663095085}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07095585503189342, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015280634049346952}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..073e0e43a6a2b99dc384be8d7474ea288f1f0fa1 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1445093157682755, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.011680141740973857}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05381940117828338, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014712457375755592}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.209465867149318, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003670571567082913}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07753925599964062, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016513114634372}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02129680401204643, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007905560723139065}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.08624541141502207, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0025045432057736777}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03145858340503504, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009713899445453562}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.051085987724314566, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013473173376081658}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2034637506343861, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003594440263896121}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07430631587097808, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015425867113632366}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.05151060744188861, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00139386344479781}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.20131756184017846, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003542645648092287}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07438757103390763, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015672563362475736}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b83b27de5c70deb1bad43ba076d9c5b0ef3fadf9 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.15397676959746612, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.020319972800181465}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05495084768413863, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015206432899508664}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.20901533932464228, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003765937704948867}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07895932583644215, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016974724661250095}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021305782148674943, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007895791251539496}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.08650908135775882, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002511055892649366}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03177157501076817, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001014427068722938}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05192806645015237, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013573191512714553}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.20293110085708246, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0036649332304372496}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07544181272417243, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015595589465977734}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.05227045530331481, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014172788365244952}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.20101071027811684, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036165987277269767}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.0753990370818791, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015858190086455858}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d9fb7c01ffede121e324a6b9d9f30f0abc614727 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.12365669611693078, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.016819858728710592}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.058162947754682705, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001603455708712496}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.20644138216593572, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037060630938948063}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08126967062478106, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016893296748050316}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021276187110273764, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007384305971847312}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.08615141744067409, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00244568873592459}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03141214421193687, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009319810101283329}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.054616367607262496, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014155344507793685}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.20085438051037754, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0036562516852884095}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07745748771292646, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015582261760097003}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.055143757729757215, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014690987760715066}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.19951563840190698, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003620210297773779}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07768139042647253, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015879259334824365}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0e7da64b7188e651d7862c4ab52af32403132650 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.08834158708210425, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015422494801128619}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1511240455748458, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023614082923857573}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10294268147642652, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015885949612612785}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.011969635451153749, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005014249403571861}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.022088097449328107, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009513551465069334}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.014076128249375142, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005452085443492553}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07677846062587532, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001243062818101287}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1339169734430534, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020383966006715017}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.0900445479921608, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012930646444676347}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.08184673815697002, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014145060460416376}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.14081489800530161, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021974572973938104}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.0955398760776783, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014590638597025002}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.6881722062318003, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03492651341120409}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..eb14a10867f82f3609032c7cf9dac0230dc7b029 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09635338878713894, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001429634043206871}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.15055746081916005, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0020409816175998956}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1079792012109596, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014252549449545956}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00790430405049511, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003845584524229758}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.013584133206075426, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0007558234415691987}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.009158861118954552, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000440640430879755}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0746787940850166, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009973389376698493}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.11936143354392323, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0015295311911311558}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08402340037468532, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009726524560201465}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09102641527702295, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013304985489080228}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.14265063103150583, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0018998082887351517}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.10206113782881272, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013176698304838254}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5189014506992261, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.037670534196124573}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b6879fbde492f5e5eea26df9bbc82abe4c90729c --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.10029653295090936, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014946374809932353}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.14402180107303292, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0020779782644098654}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10582795070072855, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0013794712845124977}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.008057008165996204, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00040900296745959225}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.013786749016989792, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000854235224150827}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.008854865373978756, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00042957921880979654}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07960000286602568, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011035510489881863}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.11668978653923719, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001658242514047369}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08413411350451327, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009822040615333329}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09489301023067956, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014010349563147654}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.13620770042084057, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001949192702183326}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09994328210117027, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0012748001140968398}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5364516905010515, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.030040870725624152}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..ebbd33f7a51596e64f3ce531d6e05e923964b2b6 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09733870322770075, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001964614418018535}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1265177788367841, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002283196052782641}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.095155817472981, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015726135414282833}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.010579030486092352, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006361010745718742}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.01457049887287368, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008743705196037304}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.009992333874941724, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004894216568424037}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07848811204409353, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015634596721503732}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.10375992972655007, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018585072141463139}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.07675731421452539, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011996056298117226}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09111564171943451, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018284817988930863}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.11839833718232995, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002115864841628711}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.08901182636481536, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014509521046499364}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.7125414859944954, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.041080704322432306}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..083cf9dce2561e7f8e714e8f3368bf6ea994f7c0 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.03402776805186772, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015021562092442141}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.044996950843626565, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0018847203928613078}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.03289180000234644, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0013035881201820096}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.004717090436515237, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004903852378905182}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0067399115754938, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000626421604068629}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.004391769717086789, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000363776059433011}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.02808301838439718, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001235285408714272}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.03778261719394529, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001573162142922227}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.027070581766123904, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010387205497611551}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.03161223415677873, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014058140100163263}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.041743224919272987, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0017391700899322268}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.03048627315330421, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0012057898408514823}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.14545889127091902, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.014044744897310252}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d6a402ec14d0f73639b0f1732d50f4dc9fe7639f --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.006781221068179048, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008370099388644863}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.007836550901817174, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0009051911934293397}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.005704280866490329, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006219180243094649}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0013677198862754726, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003346680345001492}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0016514234570176281, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00037191127816620903}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0010137961495631932, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00019029294781685254}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.005583704202117562, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006706928723442628}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.006455316117651287, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0007271642666562989}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.004651743548906982, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00048649455410650766}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.006405915344244336, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007905688564967488}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.007287098704531648, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0008304733265241732}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.00532823887838008, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005738794568348857}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 5.704374135069664e-09, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.7013212140847576e-08}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..611126a733a86a64731252f7bc41ff586affa94e --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7226cb9f54f5b81a4ec53f5e4f72e97b9487585f --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.6728852439569457, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09065446424435947}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.3933396522350936, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005148320230640675}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.20775934709343202, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026260859064331757}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.23072445045321552, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002443871760192078}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.1938587834482223, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.005729810617300909}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.05837069594557587, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012544212946420665}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.06747783829778976, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013159166329732434}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.32691601232449063, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005208463887679732}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1560034121824963, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019515494752455444}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.17588620890908344, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018377940107973307}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.34888738755497656, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005205421554401587}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.17325308245248913, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00221766186426316}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.19419672329958557, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002091472191519842}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..95e98636d49f88668fcb00f007ab4aaa769776f5 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.6759691267622747, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05152927781934443}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.43498335940641936, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005452965227789697}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.21613264334760326, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026224518654289113}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.24046253818668323, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00234658072085576}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.23691715770918176, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.006378708979164285}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.06142971373009135, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012573424940348463}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07189745808513573, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012924257721161703}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.36302611905661375, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005643736562173541}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.16024970690926627, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001911462317362814}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.18141453249435047, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017217485277798694}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3860721614508426, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005592828702084638}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.17828624080991215, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021858213523899077}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.20045453712563308, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001974909674126333}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..115c74334fc39119ea5feec7815a806819f9df9e --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.350697048132632, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.16368662036445195}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.42321061678790745, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005787468869047046}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.19856438121204104, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027009490378613094}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.22300242392840638, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002500676214947681}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.23874008605296593, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.006530181171757951}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.056759103167357966, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012360584167915294}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.06712596107139872, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012794760647159404}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.35806559522674963, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0059374591369078114}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1476711204601984, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019603780846806818}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.16938082876445262, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00185869633377821}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.37882347750269213, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005890887100641854}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.1642557288057488, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002242539346776154}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.18678713987354215, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002106687381417484}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..295200b176d085c0f32f48a2740da36694065031 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.3383066506175627, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08147359581654341}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4743498123819304, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006154240614260249}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.1934023407385226, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00264806491029735}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2230383375802249, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024298858572107357}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.29735568863767275, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.007172641535922273}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.05883954698769347, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012351467645497919}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07278484255401799, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013293808485782847}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4120893239123253, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.006381648620379837}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.14689746853919977, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001923510153311473}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.17373747802751602, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018227045337711142}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4316295979128506, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006311549546947088}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.16210256786021354, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021994593339633196}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.18953165573765662, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00204552521574545}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..33d131230aba07d582cd01f85a8afeb33667a04d --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.115844671738753, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09287010536431305}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.49372599728222144, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006303863309241805}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.1893486587365343, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026284056137824206}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.22070056495356907, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002412659551940779}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.322127699190832, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0074386563285679}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.05810478895862701, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012167049976553911}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07298455264569846, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012981277980822924}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.43350933132635133, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0065815685602999266}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.14446877213741532, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019188306832565957}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.17291135224561338, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018098077726075982}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.45167492487828215, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006510688665251907}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.15821178190359975, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00216908695004573}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.18736857411665434, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020217436017845682}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_0.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..95e24f5b13bee143eb1c2972c03f81576b3cdb16 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.09617233932550984, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015968573233213028}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2360757694545423, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00370387203918051}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.13453387679982023, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002137215663359666}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.016345450182953684, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007889657019727539}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.040945287238629884, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018815137332123609}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.023009362416004837, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010848983621226595}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08358012787593269, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013109418670182917}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.20600731487938223, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003075514228900137}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11696204362015787, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001739862040807931}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.074261757400661, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012755865913296775}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.18350710093496767, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003027114522321686}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.10393138760084052, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016974542494408457}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9122699611037882, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06570902741835642}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_1.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..1ea5f76850c5911639acd4ee5f5c75f530e60903 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08459268678603156, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015063922805013791}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21062699869544751, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0035228860393077184}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.11916742202289989, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020441367601762984}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.012597930749107738, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007377841672586165}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.031999643834912925, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017919904739554316}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.017857255626480974, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010224310064310912}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07611417379359779, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012742619154937661}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.19044265188596107, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0030309210607359196}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10734992521262915, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001730288249458564}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06528504699855653, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001169674218531962}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16387535729166744, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028298209473616265}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09212215013971556, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015951047932129575}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6934236139159788, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.055189833618672024}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_2.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d63680e33909776ce9798f00e6ced7934463bb --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08310989226757869, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014496423333643577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2077051554329215, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0034244055888820804}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.11714486547191727, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019721020649582028}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.012193809590115543, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006986539554457616}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.031175575241520804, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017093872902993448}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.017297006731733434, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009663380265457396}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.075661760756666, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001238392153577493}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18972091925552043, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029483233194428997}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10673163556701443, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001683180060909063}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06352129242042852, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011102051150740533}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16014108387182785, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026842238281168103}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.08969158968905694, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015132067354959346}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6570261539221618, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06457679953165572}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_3.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..db1724ad08787d6ef567207537840e789ba0a938 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08501067090237822, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017615025396123855}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.20134376204343213, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0035158650629011513}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.11594351096508414, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020790775255337675}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.013112083342324879, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007544397415908959}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03158394076946137, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001687518797011113}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.017911502606647596, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000985564236124104}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07777247652070679, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001507255909258555}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18553333666910865, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003100840457412398}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10646367058425908, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001810498716362027}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06541409584763817, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001407070467981226}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.15558136711822532, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027429242918312374}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.08910829543273951, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016024522291310029}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.7422613790495289, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06514723413231459}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_4.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..64b052d0c68d8e046a0b9a17ca84cfee1de43aa3 --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.031243353659348713, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0022406794856341195}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.052779876794259606, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0031003687375510277}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.03408594165345016, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020050874444915642}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0054699172104788465, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001017227653179516}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00935841193125638, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010245180706557962}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.005875450428271828, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006460157370222755}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0275752093040082, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0019600165937700346}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.04754293835542539, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0027737773827495025}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.03028975918570269, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017390916872331722}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.024866785974796596, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018932649328032359}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.041138743222902195, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024674237330471085}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.026508905866873923, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015700802469097192}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.45434695424591715, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10483639531272293}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_5.json b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..f838c2ee164bb8b03659124bbb7f134ff47726fd --- /dev/null +++ b/83m20b1b5/evaluation/generation/agg.83m20b1b5_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.003204464726749907, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0009476798992127986}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0027101036549158557, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0007645441485834611}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.002892090360931839, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008332937221299973}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0009424152737138307, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004633862018340062}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0007161109606491931, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000341349884950177}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0008079947232119309, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003898181285980445}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0024671672869581627, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0007402438440876393}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0020939538292462866, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.000594696151720948}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0022266454683835834, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0006476197460812914}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.00268168331723446, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0008163559154327512}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0022685619916896034, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0006585699169545011}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.002417399696949334, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007164145255356163}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 3.293954829261569e-37, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 4.816123484116706e-31}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..447b9a23057eabc6437dbb53bd6a9ec2e812cddf --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67a070637f9e311f87486f57d30446768fbaf05e4353baab1d91a061e697605 +size 4158570 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..620eb696aa8d8def63de4ac918d83fc173476c9f --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae7da7bdf2946cf01acf5d69f538ee0c4f72228e777fec7c3c11e6da5ab3bc20 +size 5158798 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b1d0eb0dd091627307b7160494a5d700bcd99ea4 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15fc4d4602b6f1161f46e34c7d25442e787b5b5dad1c67cf7df1f5945447280 +size 6012846 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0606efb3c66e1e55f21f92baa607c69ac41f942e --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420fcbd1cf6111c1c3bd7eaeeb5814538250dc9a68733a5193fdb44a58c32877 +size 6846658 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5a745a277bfd8d491e31fd015f25694038c81b96 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dcf46fcbad8d50faecf31c5038e78e2cfdd5d1e240c325a3ac1a94b91f2fe26 +size 7717804 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..568c357795316aa03855939eacde0260177b5b02 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e951ccae8b13539b780626b22d255abcf5d36c0bfa575012df3bd7e9a22c51f +size 8544997 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8b703415b18d40cbb4cf866abaab4a8945927853 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8938678abc9f1506ea2cdd667a391532433dabfe1db5654fe65f43d03872804a +size 7617300 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0fd3d5ce08f52f8168c89fcc866a79abcd779438 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25593faf54fbbcfbb90041857d295e20972dad75a72a17cb25e5fdeec8a54c74 +size 13334090 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..229f1552dd98895faad034a7beb061d41c89305e --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f8deaa1cee1025a4d2683f15f0bb520827aae9492ea2290f5cc731ecd9c077 +size 18881844 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7f6f1f57886749af4253ec8396efdb1b2ac0692d --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bceb2c110e61839277b054c351a5466cfec76bcf6772e071b1a5da0cfc70f6b8 +size 24290838 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..163392d1afb2652048e4fd09d5ab209b1000b3a0 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22eea1c4a517b2c8c031e0a8641cfab309985a4f9b2470fe7e92e5f26723c7a0 +size 29450702 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6fd047b578890e11d672cf3770721af0c53503b1 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c822eeaed05d482312b2ce30f9ea44699e978f08288b4b9d1c19ab42e7629f27 +size 34794099 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..61859b25647b0efc42dadbd2f2e3396e5e1676f6 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f38ca5d0b6b937994f4c014aae39ba22a8cabbf7d93ce61d6bc967b2c60dd7 +size 3619850 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2b447b481ed3ec5f231407bfe556c1064b2964a6 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e76ee7442ae6914aa1328c2752dd5673bd8a151f326423f9fe144a0c3ecf8e83 +size 4944758 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9fcaf34def353b61a95d4d08bcd4bf4d44f04a9b --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae63b9101c806ab1ae695336b14c417d0e6e98db28bec24411537b26653cf92 +size 6034067 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..18ba5d333832e83c576d6c79ab75f994915a53c7 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15fb7c0af0e4905caabc9aac60f23db6a4bc640f82139a33c02462a4fbb40afb +size 7094817 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..178d49ded2d4af5151c64dd829af2599f9351d8a --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936d2e6be7092cf60c76ceeec7ac3401b2df27ae786682c7685bf7fb159a3b29 +size 8160335 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cfc8eda62d7bf0a89d188405b811647558af226c --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:830a4d8730e468cf2d1329fe7b7effb25abc1f4a61803050401c51ad3f4ef51d +size 9240760 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_0.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..67a4fb92cffbb0532001330aadf07771fa53329a --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7482e975735f10506ff8d4ec932fcd7ce8de00e6c7e75a4e31c81f846e0ae32d +size 2839032 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_1.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..664e129beabb300bf58daea6ab39f3c23f3454b1 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c69df21148bbef27784407e3f51689daafd81fd4fe6a9b3fc9fdb9cf25ef1426 +size 5109075 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_2.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8dd992cae852b7ae807f17ee610329b12d9a371f --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4c9816074ec070166443fb9fbcb9a90cb274026d06fc107e19baa0373c8e7a2 +size 7385031 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_3.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8b421524735f7fbc7d7c74c1b5db5d9c6d296b52 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5931feaa0a906b177842e8703bc360f807cb7af61b5415557ab99cee7f2e009e +size 9652940 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_4.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0e9053f1ee553079f70e015ba9d9a7200adc6679 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4612f8479f8e8107c34ecd6e770df096d62d29f161a2d39b24e43c21019157ff +size 11675302 diff --git a/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_5.jsonl b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..127e513f280d28f8cca7b755e28963d311d00705 --- /dev/null +++ b/83m20b1b5/evaluation/generation/examples.83m20b1b5_gem_xsum_article_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e5625a7ed871c76c66ac22f3baa9a0e260fa885d532b7bcceb155b469828ae +size 13897620 diff --git a/83m20b1b5/evaluation/generation/merged.csv b/83m20b1b5/evaluation/generation/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..6579c8db9d0b6e0debe29d33a73b0da6910c047b --- /dev/null +++ b/83m20b1b5/evaluation/generation/merged.csv @@ -0,0 +1,53 @@ +dataset,fewshots,prompt,metric,value +e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.0 +e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.0 +e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.06747783829778976 +e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.06747783829778976 +e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.07189745808513573 +e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.07189745808513573 +e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.06712596107139872 +e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.06712596107139872 +e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.07278484255401799 +e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.07278484255401799 +e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.07298455264569846 +e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.07298455264569846 +e2e_nlg_cleaned,5,average,multiple,0.05871177544234011 +gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.023009362416004837 +gem_xsum,0,median,rouge2_fmeasure,0.023009362416004837 +gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.017857255626480974 +gem_xsum,1,median,rouge2_fmeasure,0.017857255626480974 +gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.017297006731733434 +gem_xsum,2,median,rouge2_fmeasure,0.017297006731733434 +gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.017911502606647596 +gem_xsum,3,median,rouge2_fmeasure,0.017911502606647596 +gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.005875450428271828 +gem_xsum,4,median,rouge2_fmeasure,0.005875450428271828 +gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0008079947232119309 +gem_xsum,5,median,rouge2_fmeasure,0.0008079947232119309 +gem_xsum,5,average,multiple,0.013793095422058433 +web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.041199412710563856 +web_nlg_en,0,median,rouge2_fmeasure,0.041199412710563856 +web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.033033827764874776 +web_nlg_en,1,median,rouge2_fmeasure,0.033033827764874776 +web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.030311839537456788 +web_nlg_en,2,median,rouge2_fmeasure,0.030311839537456788 +web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.03145858340503504 +web_nlg_en,3,median,rouge2_fmeasure,0.03145858340503504 +web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.03177157501076817 +web_nlg_en,4,median,rouge2_fmeasure,0.03177157501076817 +web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.03141214421193687 +web_nlg_en,5,median,rouge2_fmeasure,0.03141214421193687 +web_nlg_en,5,average,multiple,0.03319789710677258 +wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.014076128249375142 +wiki_lingua_en,0,median,rouge2_fmeasure,0.014076128249375142 +wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.009158861118954552 +wiki_lingua_en,1,median,rouge2_fmeasure,0.009158861118954552 +wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.008854865373978756 +wiki_lingua_en,2,median,rouge2_fmeasure,0.008854865373978756 +wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.009992333874941724 +wiki_lingua_en,3,median,rouge2_fmeasure,0.009992333874941724 +wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.004391769717086789 +wiki_lingua_en,4,median,rouge2_fmeasure,0.004391769717086789 +wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0010137961495631932 +wiki_lingua_en,5,median,rouge2_fmeasure,0.0010137961495631932 +wiki_lingua_en,5,average,multiple,0.007914625747316693 diff --git a/83m20b1b5/evaluation/generation/merged.json b/83m20b1b5/evaluation/generation/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..cc3babd176e02b06fde7baaeee4b9de75baaaacb --- /dev/null +++ b/83m20b1b5/evaluation/generation/merged.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.22143394507810327, "bleu_stderr": 0.02465792463448575, "rouge1_fmeasure": 0.08800136491523618, "rouge1_fmeasure_stderr": 0.0016491169576186293, "rouge1_precision": 0.057248981329799054, "rouge1_precision_stderr": 0.001192612421597827, "rouge1_recall": 0.24543061097061072, "rouge1_recall_stderr": 0.003908026836015179, "rouge2_fmeasure": 0.041199412710563856, "rouge2_fmeasure_stderr": 0.0010517042389501993, "rouge2_precision": 0.026714899334789086, "rouge2_precision_stderr": 0.0007442762398954055, "rouge2_recall": 0.1178094552104916, "rouge2_recall_stderr": 0.0027477564434458214, "rougeL_fmeasure": 0.08560059630016492, "rougeL_fmeasure_stderr": 0.0015823553571084356, "rougeL_precision": 0.055526615857988376, "rougeL_precision_stderr": 0.001130839530057266, "rougeL_recall": 0.24063012834551578, "rougeL_recall_stderr": 0.003857572393248235, "rougeLsum_fmeasure": 0.08463176069741568, "rougeLsum_fmeasure_stderr": 0.001556056466058389, "rougeLsum_precision": 0.054979572657230524, "rougeLsum_precision_stderr": 0.0011215172613670027, "rougeLsum_recall": 0.23729393562344675, "rougeLsum_recall_stderr": 0.003728197228303874}}, "1": {"PALM_prompt": {"bleu": 0.1624501575699319, "bleu_stderr": 0.02485001075950609, "rouge1_fmeasure": 0.07652804993972759, "rouge1_fmeasure_stderr": 0.0015509513452004412, "rouge1_precision": 0.04976906607582228, "rouge1_precision_stderr": 0.001223285628673813, "rouge1_recall": 0.22912868657496024, "rouge1_recall_stderr": 0.004000208428173092, "rouge2_fmeasure": 0.033033827764874776, "rouge2_fmeasure_stderr": 0.0009553089639751551, "rouge2_precision": 0.021280843722581117, "rouge2_precision_stderr": 0.0006558396664738548, "rouge2_recall": 0.09698896856082298, "rouge2_recall_stderr": 0.0026302797431824575, "rougeL_fmeasure": 0.0749498233935733, "rougeL_fmeasure_stderr": 0.001512700175941009, "rougeL_precision": 0.048710096616471736, "rougeL_precision_stderr": 0.0011961828433359935, "rougeL_recall": 0.22437608864144468, "rougeL_recall_stderr": 0.003906105502350576, "rougeLsum_fmeasure": 0.0738050333776553, "rougeLsum_fmeasure_stderr": 0.0015085683805012107, "rougeLsum_precision": 0.04804749736676356, "rougeLsum_precision_stderr": 0.0011967224986575548, "rougeLsum_recall": 0.219947744667869, "rougeLsum_recall_stderr": 0.0038099663633966267}}, "2": {"PALM_prompt": {"bleu": 0.1339593574453344, "bleu_stderr": 0.011475928272299842, "rouge1_fmeasure": 0.07387997790765682, "rouge1_fmeasure_stderr": 0.001583264492597526, "rouge1_precision": 0.049361448132339274, "rouge1_precision_stderr": 0.001239016812669748, "rouge1_recall": 0.21230159325238288, "rouge1_recall_stderr": 0.003778690654328066, "rouge2_fmeasure": 0.030311839537456788, "rouge2_fmeasure_stderr": 0.0009405456802500011, "rouge2_precision": 0.01991718794966518, "rouge2_precision_stderr": 0.0006694648433471455, "rouge2_recall": 0.08574459105462398, "rouge2_recall_stderr": 0.002452796936423602, "rougeL_fmeasure": 0.07186428635373836, "rougeL_fmeasure_stderr": 0.0015257825086432422, "rougeL_precision": 0.04761539861338694, "rougeL_precision_stderr": 0.0011459311415354199, "rougeL_recall": 0.20795605555159263, "rougeL_recall_stderr": 0.0037279876892756243, "rougeLsum_fmeasure": 0.07095585503189342, "rougeLsum_fmeasure_stderr": 0.0015280634049346952, "rougeLsum_precision": 0.047200993059258047, "rougeLsum_precision_stderr": 0.0011619201330870998, "rougeLsum_recall": 0.20380441991273304, "rougeLsum_recall_stderr": 0.0036356400663095085}}, "3": {"PALM_prompt": {"bleu": 0.1445093157682755, "bleu_stderr": 0.011680141740973857, "rouge1_fmeasure": 0.07753925599964062, "rouge1_fmeasure_stderr": 0.0016513114634372, "rouge1_precision": 0.05381940117828338, "rouge1_precision_stderr": 0.0014712457375755592, "rouge1_recall": 0.209465867149318, "rouge1_recall_stderr": 0.003670571567082913, "rouge2_fmeasure": 0.03145858340503504, "rouge2_fmeasure_stderr": 0.0009713899445453562, "rouge2_precision": 0.02129680401204643, "rouge2_precision_stderr": 0.0007905560723139065, "rouge2_recall": 0.08624541141502207, "rouge2_recall_stderr": 0.0025045432057736777, "rougeL_fmeasure": 0.07430631587097808, "rougeL_fmeasure_stderr": 0.0015425867113632366, "rougeL_precision": 0.051085987724314566, "rougeL_precision_stderr": 0.0013473173376081658, "rougeL_recall": 0.2034637506343861, "rougeL_recall_stderr": 0.003594440263896121, "rougeLsum_fmeasure": 0.07438757103390763, "rougeLsum_fmeasure_stderr": 0.0015672563362475736, "rougeLsum_precision": 0.05151060744188861, "rougeLsum_precision_stderr": 0.00139386344479781, "rougeLsum_recall": 0.20131756184017846, "rougeLsum_recall_stderr": 0.003542645648092287}}, "4": {"PALM_prompt": {"bleu": 0.15397676959746612, "bleu_stderr": 0.020319972800181465, "rouge1_fmeasure": 0.07895932583644215, "rouge1_fmeasure_stderr": 0.0016974724661250095, "rouge1_precision": 0.05495084768413863, "rouge1_precision_stderr": 0.0015206432899508664, "rouge1_recall": 0.20901533932464228, "rouge1_recall_stderr": 0.003765937704948867, "rouge2_fmeasure": 0.03177157501076817, "rouge2_fmeasure_stderr": 0.001014427068722938, "rouge2_precision": 0.021305782148674943, "rouge2_precision_stderr": 0.0007895791251539496, "rouge2_recall": 0.08650908135775882, "rouge2_recall_stderr": 0.002511055892649366, "rougeL_fmeasure": 0.07544181272417243, "rougeL_fmeasure_stderr": 0.0015595589465977734, "rougeL_precision": 0.05192806645015237, "rougeL_precision_stderr": 0.0013573191512714553, "rougeL_recall": 0.20293110085708246, "rougeL_recall_stderr": 0.0036649332304372496, "rougeLsum_fmeasure": 0.0753990370818791, "rougeLsum_fmeasure_stderr": 0.0015858190086455858, "rougeLsum_precision": 0.05227045530331481, "rougeLsum_precision_stderr": 0.0014172788365244952, "rougeLsum_recall": 0.20101071027811684, "rougeLsum_recall_stderr": 0.0036165987277269767}}, "5": {"PALM_prompt": {"bleu": 0.12365669611693078, "bleu_stderr": 0.016819858728710592, "rouge1_fmeasure": 0.08126967062478106, "rouge1_fmeasure_stderr": 0.0016893296748050316, "rouge1_precision": 0.058162947754682705, "rouge1_precision_stderr": 0.001603455708712496, "rouge1_recall": 0.20644138216593572, "rouge1_recall_stderr": 0.0037060630938948063, "rouge2_fmeasure": 0.03141214421193687, "rouge2_fmeasure_stderr": 0.0009319810101283329, "rouge2_precision": 0.021276187110273764, "rouge2_precision_stderr": 0.0007384305971847312, "rouge2_recall": 0.08615141744067409, "rouge2_recall_stderr": 0.00244568873592459, "rougeL_fmeasure": 0.07745748771292646, "rougeL_fmeasure_stderr": 0.0015582261760097003, "rougeL_precision": 0.054616367607262496, "rougeL_precision_stderr": 0.0014155344507793685, "rougeL_recall": 0.20085438051037754, "rougeL_recall_stderr": 0.0036562516852884095, "rougeLsum_fmeasure": 0.07768139042647253, "rougeLsum_fmeasure_stderr": 0.0015879259334824365, "rougeLsum_precision": 0.055143757729757215, "rougeLsum_precision_stderr": 0.0014690987760715066, "rougeLsum_recall": 0.19951563840190698, "rougeLsum_recall_stderr": 0.003620210297773779}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 0.6881722062318003, "bleu_stderr": 0.03492651341120409, "rouge1_fmeasure": 0.10294268147642652, "rouge1_fmeasure_stderr": 0.0015885949612612785, "rouge1_precision": 0.08834158708210425, "rouge1_precision_stderr": 0.0015422494801128619, "rouge1_recall": 0.1511240455748458, "rouge1_recall_stderr": 0.0023614082923857573, "rouge2_fmeasure": 0.014076128249375142, "rouge2_fmeasure_stderr": 0.0005452085443492553, "rouge2_precision": 0.011969635451153749, "rouge2_precision_stderr": 0.0005014249403571861, "rouge2_recall": 0.022088097449328107, "rouge2_recall_stderr": 0.0009513551465069334, "rougeL_fmeasure": 0.0900445479921608, "rougeL_fmeasure_stderr": 0.0012930646444676347, "rougeL_precision": 0.07677846062587532, "rougeL_precision_stderr": 0.001243062818101287, "rougeL_recall": 0.1339169734430534, "rougeL_recall_stderr": 0.0020383966006715017, "rougeLsum_fmeasure": 0.0955398760776783, "rougeLsum_fmeasure_stderr": 0.0014590638597025002, "rougeLsum_precision": 0.08184673815697002, "rougeLsum_precision_stderr": 0.0014145060460416376, "rougeLsum_recall": 0.14081489800530161, "rougeLsum_recall_stderr": 0.0021974572973938104}}, "1": {"tldr_en": {"bleu": 0.5189014506992261, "bleu_stderr": 0.037670534196124573, "rouge1_fmeasure": 0.1079792012109596, "rouge1_fmeasure_stderr": 0.0014252549449545956, "rouge1_precision": 0.09635338878713894, "rouge1_precision_stderr": 0.001429634043206871, "rouge1_recall": 0.15055746081916005, "rouge1_recall_stderr": 0.0020409816175998956, "rouge2_fmeasure": 0.009158861118954552, "rouge2_fmeasure_stderr": 0.000440640430879755, "rouge2_precision": 0.00790430405049511, "rouge2_precision_stderr": 0.0003845584524229758, "rouge2_recall": 0.013584133206075426, "rouge2_recall_stderr": 0.0007558234415691987, "rougeL_fmeasure": 0.08402340037468532, "rougeL_fmeasure_stderr": 0.0009726524560201465, "rougeL_precision": 0.0746787940850166, "rougeL_precision_stderr": 0.0009973389376698493, "rougeL_recall": 0.11936143354392323, "rougeL_recall_stderr": 0.0015295311911311558, "rougeLsum_fmeasure": 0.10206113782881272, "rougeLsum_fmeasure_stderr": 0.0013176698304838254, "rougeLsum_precision": 0.09102641527702295, "rougeLsum_precision_stderr": 0.0013304985489080228, "rougeLsum_recall": 0.14265063103150583, "rougeLsum_recall_stderr": 0.0018998082887351517}}, "2": {"tldr_en": {"bleu": 0.5364516905010515, "bleu_stderr": 0.030040870725624152, "rouge1_fmeasure": 0.10582795070072855, "rouge1_fmeasure_stderr": 0.0013794712845124977, "rouge1_precision": 0.10029653295090936, "rouge1_precision_stderr": 0.0014946374809932353, "rouge1_recall": 0.14402180107303292, "rouge1_recall_stderr": 0.0020779782644098654, "rouge2_fmeasure": 0.008854865373978756, "rouge2_fmeasure_stderr": 0.00042957921880979654, "rouge2_precision": 0.008057008165996204, "rouge2_precision_stderr": 0.00040900296745959225, "rouge2_recall": 0.013786749016989792, "rouge2_recall_stderr": 0.000854235224150827, "rougeL_fmeasure": 0.08413411350451327, "rougeL_fmeasure_stderr": 0.0009822040615333329, "rougeL_precision": 0.07960000286602568, "rougeL_precision_stderr": 0.0011035510489881863, "rougeL_recall": 0.11668978653923719, "rougeL_recall_stderr": 0.001658242514047369, "rougeLsum_fmeasure": 0.09994328210117027, "rougeLsum_fmeasure_stderr": 0.0012748001140968398, "rougeLsum_precision": 0.09489301023067956, "rougeLsum_precision_stderr": 0.0014010349563147654, "rougeLsum_recall": 0.13620770042084057, "rougeLsum_recall_stderr": 0.001949192702183326}}, "3": {"tldr_en": {"bleu": 0.7125414859944954, "bleu_stderr": 0.041080704322432306, "rouge1_fmeasure": 0.095155817472981, "rouge1_fmeasure_stderr": 0.0015726135414282833, "rouge1_precision": 0.09733870322770075, "rouge1_precision_stderr": 0.001964614418018535, "rouge1_recall": 0.1265177788367841, "rouge1_recall_stderr": 0.002283196052782641, "rouge2_fmeasure": 0.009992333874941724, "rouge2_fmeasure_stderr": 0.0004894216568424037, "rouge2_precision": 0.010579030486092352, "rouge2_precision_stderr": 0.0006361010745718742, "rouge2_recall": 0.01457049887287368, "rouge2_recall_stderr": 0.0008743705196037304, "rougeL_fmeasure": 0.07675731421452539, "rougeL_fmeasure_stderr": 0.0011996056298117226, "rougeL_precision": 0.07848811204409353, "rougeL_precision_stderr": 0.0015634596721503732, "rougeL_recall": 0.10375992972655007, "rougeL_recall_stderr": 0.0018585072141463139, "rougeLsum_fmeasure": 0.08901182636481536, "rougeLsum_fmeasure_stderr": 0.0014509521046499364, "rougeLsum_precision": 0.09111564171943451, "rougeLsum_precision_stderr": 0.0018284817988930863, "rougeLsum_recall": 0.11839833718232995, "rougeLsum_recall_stderr": 0.002115864841628711}}, "4": {"tldr_en": {"bleu": 0.14545889127091902, "bleu_stderr": 0.014044744897310252, "rouge1_fmeasure": 0.03289180000234644, "rouge1_fmeasure_stderr": 0.0013035881201820096, "rouge1_precision": 0.03402776805186772, "rouge1_precision_stderr": 0.0015021562092442141, "rouge1_recall": 0.044996950843626565, "rouge1_recall_stderr": 0.0018847203928613078, "rouge2_fmeasure": 0.004391769717086789, "rouge2_fmeasure_stderr": 0.000363776059433011, "rouge2_precision": 0.004717090436515237, "rouge2_precision_stderr": 0.0004903852378905182, "rouge2_recall": 0.0067399115754938, "rouge2_recall_stderr": 0.000626421604068629, "rougeL_fmeasure": 0.027070581766123904, "rougeL_fmeasure_stderr": 0.0010387205497611551, "rougeL_precision": 0.02808301838439718, "rougeL_precision_stderr": 0.001235285408714272, "rougeL_recall": 0.03778261719394529, "rougeL_recall_stderr": 0.001573162142922227, "rougeLsum_fmeasure": 0.03048627315330421, "rougeLsum_fmeasure_stderr": 0.0012057898408514823, "rougeLsum_precision": 0.03161223415677873, "rougeLsum_precision_stderr": 0.0014058140100163263, "rougeLsum_recall": 0.041743224919272987, "rougeLsum_recall_stderr": 0.0017391700899322268}}, "5": {"tldr_en": {"bleu": 5.704374135069664e-09, "bleu_stderr": 1.7013212140847576e-08, "rouge1_fmeasure": 0.005704280866490329, "rouge1_fmeasure_stderr": 0.0006219180243094649, "rouge1_precision": 0.006781221068179048, "rouge1_precision_stderr": 0.0008370099388644863, "rouge1_recall": 0.007836550901817174, "rouge1_recall_stderr": 0.0009051911934293397, "rouge2_fmeasure": 0.0010137961495631932, "rouge2_fmeasure_stderr": 0.00019029294781685254, "rouge2_precision": 0.0013677198862754726, "rouge2_precision_stderr": 0.0003346680345001492, "rouge2_recall": 0.0016514234570176281, "rouge2_recall_stderr": 0.00037191127816620903, "rougeL_fmeasure": 0.004651743548906982, "rougeL_fmeasure_stderr": 0.00048649455410650766, "rougeL_precision": 0.005583704202117562, "rougeL_precision_stderr": 0.0006706928723442628, "rougeL_recall": 0.006455316117651287, "rougeL_recall_stderr": 0.0007271642666562989, "rougeLsum_fmeasure": 0.00532823887838008, "rougeLsum_fmeasure_stderr": 0.0005738794568348857, "rougeLsum_precision": 0.006405915344244336, "rougeLsum_precision_stderr": 0.0007905688564967488, "rougeLsum_recall": 0.007287098704531648, "rougeLsum_recall_stderr": 0.0008304733265241732}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.0, "bleu_stderr": 0.0, "rouge1_fmeasure": 0.0, "rouge1_fmeasure_stderr": 0.0, "rouge1_precision": 0.0, "rouge1_precision_stderr": 0.0, "rouge1_recall": 0.0, "rouge1_recall_stderr": 0.0, "rouge2_fmeasure": 0.0, "rouge2_fmeasure_stderr": 0.0, "rouge2_precision": 0.0, "rouge2_precision_stderr": 0.0, "rouge2_recall": 0.0, "rouge2_recall_stderr": 0.0, "rougeL_fmeasure": 0.0, "rougeL_fmeasure_stderr": 0.0, "rougeL_precision": 0.0, "rougeL_precision_stderr": 0.0, "rougeL_recall": 0.0, "rougeL_recall_stderr": 0.0, "rougeLsum_fmeasure": 0.0, "rougeLsum_fmeasure_stderr": 0.0, "rougeLsum_precision": 0.0, "rougeLsum_precision_stderr": 0.0, "rougeLsum_recall": 0.0, "rougeLsum_recall_stderr": 0.0}}, "1": {"generate_text_restaurant": {"bleu": 2.6728852439569457, "bleu_stderr": 0.09065446424435947, "rouge1_fmeasure": 0.23072445045321552, "rouge1_fmeasure_stderr": 0.002443871760192078, "rouge1_precision": 0.3933396522350936, "rouge1_precision_stderr": 0.005148320230640675, "rouge1_recall": 0.20775934709343202, "rouge1_recall_stderr": 0.0026260859064331757, "rouge2_fmeasure": 0.06747783829778976, "rouge2_fmeasure_stderr": 0.0013159166329732434, "rouge2_precision": 0.1938587834482223, "rouge2_precision_stderr": 0.005729810617300909, "rouge2_recall": 0.05837069594557587, "rouge2_recall_stderr": 0.0012544212946420665, "rougeL_fmeasure": 0.17588620890908344, "rougeL_fmeasure_stderr": 0.0018377940107973307, "rougeL_precision": 0.32691601232449063, "rougeL_precision_stderr": 0.005208463887679732, "rougeL_recall": 0.1560034121824963, "rougeL_recall_stderr": 0.0019515494752455444, "rougeLsum_fmeasure": 0.19419672329958557, "rougeLsum_fmeasure_stderr": 0.002091472191519842, "rougeLsum_precision": 0.34888738755497656, "rougeLsum_precision_stderr": 0.005205421554401587, "rougeLsum_recall": 0.17325308245248913, "rougeLsum_recall_stderr": 0.00221766186426316}}, "2": {"generate_text_restaurant": {"bleu": 2.6759691267622747, "bleu_stderr": 0.05152927781934443, "rouge1_fmeasure": 0.24046253818668323, "rouge1_fmeasure_stderr": 0.00234658072085576, "rouge1_precision": 0.43498335940641936, "rouge1_precision_stderr": 0.005452965227789697, "rouge1_recall": 0.21613264334760326, "rouge1_recall_stderr": 0.0026224518654289113, "rouge2_fmeasure": 0.07189745808513573, "rouge2_fmeasure_stderr": 0.0012924257721161703, "rouge2_precision": 0.23691715770918176, "rouge2_precision_stderr": 0.006378708979164285, "rouge2_recall": 0.06142971373009135, "rouge2_recall_stderr": 0.0012573424940348463, "rougeL_fmeasure": 0.18141453249435047, "rougeL_fmeasure_stderr": 0.0017217485277798694, "rougeL_precision": 0.36302611905661375, "rougeL_precision_stderr": 0.005643736562173541, "rougeL_recall": 0.16024970690926627, "rougeL_recall_stderr": 0.001911462317362814, "rougeLsum_fmeasure": 0.20045453712563308, "rougeLsum_fmeasure_stderr": 0.001974909674126333, "rougeLsum_precision": 0.3860721614508426, "rougeLsum_precision_stderr": 0.005592828702084638, "rougeLsum_recall": 0.17828624080991215, "rougeLsum_recall_stderr": 0.0021858213523899077}}, "3": {"generate_text_restaurant": {"bleu": 2.350697048132632, "bleu_stderr": 0.16368662036445195, "rouge1_fmeasure": 0.22300242392840638, "rouge1_fmeasure_stderr": 0.002500676214947681, "rouge1_precision": 0.42321061678790745, "rouge1_precision_stderr": 0.005787468869047046, "rouge1_recall": 0.19856438121204104, "rouge1_recall_stderr": 0.0027009490378613094, "rouge2_fmeasure": 0.06712596107139872, "rouge2_fmeasure_stderr": 0.0012794760647159404, "rouge2_precision": 0.23874008605296593, "rouge2_precision_stderr": 0.006530181171757951, "rouge2_recall": 0.056759103167357966, "rouge2_recall_stderr": 0.0012360584167915294, "rougeL_fmeasure": 0.16938082876445262, "rougeL_fmeasure_stderr": 0.00185869633377821, "rougeL_precision": 0.35806559522674963, "rougeL_precision_stderr": 0.0059374591369078114, "rougeL_recall": 0.1476711204601984, "rougeL_recall_stderr": 0.0019603780846806818, "rougeLsum_fmeasure": 0.18678713987354215, "rougeLsum_fmeasure_stderr": 0.002106687381417484, "rougeLsum_precision": 0.37882347750269213, "rougeLsum_precision_stderr": 0.005890887100641854, "rougeLsum_recall": 0.1642557288057488, "rougeLsum_recall_stderr": 0.002242539346776154}}, "4": {"generate_text_restaurant": {"bleu": 2.3383066506175627, "bleu_stderr": 0.08147359581654341, "rouge1_fmeasure": 0.2230383375802249, "rouge1_fmeasure_stderr": 0.0024298858572107357, "rouge1_precision": 0.4743498123819304, "rouge1_precision_stderr": 0.006154240614260249, "rouge1_recall": 0.1934023407385226, "rouge1_recall_stderr": 0.00264806491029735, "rouge2_fmeasure": 0.07278484255401799, "rouge2_fmeasure_stderr": 0.0013293808485782847, "rouge2_precision": 0.29735568863767275, "rouge2_precision_stderr": 0.007172641535922273, "rouge2_recall": 0.05883954698769347, "rouge2_recall_stderr": 0.0012351467645497919, "rougeL_fmeasure": 0.17373747802751602, "rougeL_fmeasure_stderr": 0.0018227045337711142, "rougeL_precision": 0.4120893239123253, "rougeL_precision_stderr": 0.006381648620379837, "rougeL_recall": 0.14689746853919977, "rougeL_recall_stderr": 0.001923510153311473, "rougeLsum_fmeasure": 0.18953165573765662, "rougeLsum_fmeasure_stderr": 0.00204552521574545, "rougeLsum_precision": 0.4316295979128506, "rougeLsum_precision_stderr": 0.006311549546947088, "rougeLsum_recall": 0.16210256786021354, "rougeLsum_recall_stderr": 0.0021994593339633196}}, "5": {"generate_text_restaurant": {"bleu": 2.115844671738753, "bleu_stderr": 0.09287010536431305, "rouge1_fmeasure": 0.22070056495356907, "rouge1_fmeasure_stderr": 0.002412659551940779, "rouge1_precision": 0.49372599728222144, "rouge1_precision_stderr": 0.006303863309241805, "rouge1_recall": 0.1893486587365343, "rouge1_recall_stderr": 0.0026284056137824206, "rouge2_fmeasure": 0.07298455264569846, "rouge2_fmeasure_stderr": 0.0012981277980822924, "rouge2_precision": 0.322127699190832, "rouge2_precision_stderr": 0.0074386563285679, "rouge2_recall": 0.05810478895862701, "rouge2_recall_stderr": 0.0012167049976553911, "rougeL_fmeasure": 0.17291135224561338, "rougeL_fmeasure_stderr": 0.0018098077726075982, "rougeL_precision": 0.43350933132635133, "rougeL_precision_stderr": 0.0065815685602999266, "rougeL_recall": 0.14446877213741532, "rougeL_recall_stderr": 0.0019188306832565957, "rougeLsum_fmeasure": 0.18736857411665434, "rougeLsum_fmeasure_stderr": 0.0020217436017845682, "rougeLsum_precision": 0.45167492487828215, "rougeLsum_precision_stderr": 0.006510688665251907, "rougeLsum_recall": 0.15821178190359975, "rougeLsum_recall_stderr": 0.00216908695004573}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 0.9122699611037882, "bleu_stderr": 0.06570902741835642, "rouge1_fmeasure": 0.13453387679982023, "rouge1_fmeasure_stderr": 0.002137215663359666, "rouge1_precision": 0.09617233932550984, "rouge1_precision_stderr": 0.0015968573233213028, "rouge1_recall": 0.2360757694545423, "rouge1_recall_stderr": 0.00370387203918051, "rouge2_fmeasure": 0.023009362416004837, "rouge2_fmeasure_stderr": 0.0010848983621226595, "rouge2_precision": 0.016345450182953684, "rouge2_precision_stderr": 0.0007889657019727539, "rouge2_recall": 0.040945287238629884, "rouge2_recall_stderr": 0.0018815137332123609, "rougeL_fmeasure": 0.11696204362015787, "rougeL_fmeasure_stderr": 0.001739862040807931, "rougeL_precision": 0.08358012787593269, "rougeL_precision_stderr": 0.0013109418670182917, "rougeL_recall": 0.20600731487938223, "rougeL_recall_stderr": 0.003075514228900137, "rougeLsum_fmeasure": 0.10393138760084052, "rougeLsum_fmeasure_stderr": 0.0016974542494408457, "rougeLsum_precision": 0.074261757400661, "rougeLsum_precision_stderr": 0.0012755865913296775, "rougeLsum_recall": 0.18350710093496767, "rougeLsum_recall_stderr": 0.003027114522321686}}, "1": {"article_DOC_summary": {"bleu": 0.6934236139159788, "bleu_stderr": 0.055189833618672024, "rouge1_fmeasure": 0.11916742202289989, "rouge1_fmeasure_stderr": 0.0020441367601762984, "rouge1_precision": 0.08459268678603156, "rouge1_precision_stderr": 0.0015063922805013791, "rouge1_recall": 0.21062699869544751, "rouge1_recall_stderr": 0.0035228860393077184, "rouge2_fmeasure": 0.017857255626480974, "rouge2_fmeasure_stderr": 0.0010224310064310912, "rouge2_precision": 0.012597930749107738, "rouge2_precision_stderr": 0.0007377841672586165, "rouge2_recall": 0.031999643834912925, "rouge2_recall_stderr": 0.0017919904739554316, "rougeL_fmeasure": 0.10734992521262915, "rougeL_fmeasure_stderr": 0.001730288249458564, "rougeL_precision": 0.07611417379359779, "rougeL_precision_stderr": 0.0012742619154937661, "rougeL_recall": 0.19044265188596107, "rougeL_recall_stderr": 0.0030309210607359196, "rougeLsum_fmeasure": 0.09212215013971556, "rougeLsum_fmeasure_stderr": 0.0015951047932129575, "rougeLsum_precision": 0.06528504699855653, "rougeLsum_precision_stderr": 0.001169674218531962, "rougeLsum_recall": 0.16387535729166744, "rougeLsum_recall_stderr": 0.0028298209473616265}}, "2": {"article_DOC_summary": {"bleu": 0.6570261539221618, "bleu_stderr": 0.06457679953165572, "rouge1_fmeasure": 0.11714486547191727, "rouge1_fmeasure_stderr": 0.0019721020649582028, "rouge1_precision": 0.08310989226757869, "rouge1_precision_stderr": 0.0014496423333643577, "rouge1_recall": 0.2077051554329215, "rouge1_recall_stderr": 0.0034244055888820804, "rouge2_fmeasure": 0.017297006731733434, "rouge2_fmeasure_stderr": 0.0009663380265457396, "rouge2_precision": 0.012193809590115543, "rouge2_precision_stderr": 0.0006986539554457616, "rouge2_recall": 0.031175575241520804, "rouge2_recall_stderr": 0.0017093872902993448, "rougeL_fmeasure": 0.10673163556701443, "rougeL_fmeasure_stderr": 0.001683180060909063, "rougeL_precision": 0.075661760756666, "rougeL_precision_stderr": 0.001238392153577493, "rougeL_recall": 0.18972091925552043, "rougeL_recall_stderr": 0.0029483233194428997, "rougeLsum_fmeasure": 0.08969158968905694, "rougeLsum_fmeasure_stderr": 0.0015132067354959346, "rougeLsum_precision": 0.06352129242042852, "rougeLsum_precision_stderr": 0.0011102051150740533, "rougeLsum_recall": 0.16014108387182785, "rougeLsum_recall_stderr": 0.0026842238281168103}}, "3": {"article_DOC_summary": {"bleu": 0.7422613790495289, "bleu_stderr": 0.06514723413231459, "rouge1_fmeasure": 0.11594351096508414, "rouge1_fmeasure_stderr": 0.0020790775255337675, "rouge1_precision": 0.08501067090237822, "rouge1_precision_stderr": 0.0017615025396123855, "rouge1_recall": 0.20134376204343213, "rouge1_recall_stderr": 0.0035158650629011513, "rouge2_fmeasure": 0.017911502606647596, "rouge2_fmeasure_stderr": 0.000985564236124104, "rouge2_precision": 0.013112083342324879, "rouge2_precision_stderr": 0.0007544397415908959, "rouge2_recall": 0.03158394076946137, "rouge2_recall_stderr": 0.001687518797011113, "rougeL_fmeasure": 0.10646367058425908, "rougeL_fmeasure_stderr": 0.001810498716362027, "rougeL_precision": 0.07777247652070679, "rougeL_precision_stderr": 0.001507255909258555, "rougeL_recall": 0.18553333666910865, "rougeL_recall_stderr": 0.003100840457412398, "rougeLsum_fmeasure": 0.08910829543273951, "rougeLsum_fmeasure_stderr": 0.0016024522291310029, "rougeLsum_precision": 0.06541409584763817, "rougeLsum_precision_stderr": 0.001407070467981226, "rougeLsum_recall": 0.15558136711822532, "rougeLsum_recall_stderr": 0.0027429242918312374}}, "4": {"article_DOC_summary": {"bleu": 0.45434695424591715, "bleu_stderr": 0.10483639531272293, "rouge1_fmeasure": 0.03408594165345016, "rouge1_fmeasure_stderr": 0.0020050874444915642, "rouge1_precision": 0.031243353659348713, "rouge1_precision_stderr": 0.0022406794856341195, "rouge1_recall": 0.052779876794259606, "rouge1_recall_stderr": 0.0031003687375510277, "rouge2_fmeasure": 0.005875450428271828, "rouge2_fmeasure_stderr": 0.0006460157370222755, "rouge2_precision": 0.0054699172104788465, "rouge2_precision_stderr": 0.001017227653179516, "rouge2_recall": 0.00935841193125638, "rouge2_recall_stderr": 0.0010245180706557962, "rougeL_fmeasure": 0.03028975918570269, "rougeL_fmeasure_stderr": 0.0017390916872331722, "rougeL_precision": 0.0275752093040082, "rougeL_precision_stderr": 0.0019600165937700346, "rougeL_recall": 0.04754293835542539, "rougeL_recall_stderr": 0.0027737773827495025, "rougeLsum_fmeasure": 0.026508905866873923, "rougeLsum_fmeasure_stderr": 0.0015700802469097192, "rougeLsum_precision": 0.024866785974796596, "rougeLsum_precision_stderr": 0.0018932649328032359, "rougeLsum_recall": 0.041138743222902195, "rougeLsum_recall_stderr": 0.0024674237330471085}}, "5": {"article_DOC_summary": {"bleu": 3.293954829261569e-37, "bleu_stderr": 4.816123484116706e-31, "rouge1_fmeasure": 0.002892090360931839, "rouge1_fmeasure_stderr": 0.0008332937221299973, "rouge1_precision": 0.003204464726749907, "rouge1_precision_stderr": 0.0009476798992127986, "rouge1_recall": 0.0027101036549158557, "rouge1_recall_stderr": 0.0007645441485834611, "rouge2_fmeasure": 0.0008079947232119309, "rouge2_fmeasure_stderr": 0.0003898181285980445, "rouge2_precision": 0.0009424152737138307, "rouge2_precision_stderr": 0.0004633862018340062, "rouge2_recall": 0.0007161109606491931, "rouge2_recall_stderr": 0.000341349884950177, "rougeL_fmeasure": 0.0022266454683835834, "rougeL_fmeasure_stderr": 0.0006476197460812914, "rougeL_precision": 0.0024671672869581627, "rougeL_precision_stderr": 0.0007402438440876393, "rougeL_recall": 0.0020939538292462866, "rougeL_recall_stderr": 0.000594696151720948, "rougeLsum_fmeasure": 0.002417399696949334, "rougeLsum_fmeasure_stderr": 0.0007164145255356163, "rougeLsum_precision": 0.00268168331723446, "rougeLsum_precision_stderr": 0.0008163559154327512, "rougeLsum_recall": 0.0022685619916896034, "rougeLsum_recall_stderr": 0.0006585699169545011}}}} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..f9b9ccdc655c70e35d2d9821a0e251ea23e9ba40 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.22143394507810327, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02465792463448575 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.057248981329799054, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001192612421597827 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.24543061097061072, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003908026836015179 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08800136491523618, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016491169576186293 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.026714899334789086, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007442762398954055 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.1178094552104916, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0027477564434458214 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.041199412710563856, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0010517042389501993 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.055526615857988376, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001130839530057266 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.24063012834551578, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003857572393248235 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08560059630016492, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015823553571084356 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.054979572657230524, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011215172613670027 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.23729393562344675, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003728197228303874 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08463176069741568, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001556056466058389 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7222c2304daa2518a8fe00d80f35e78aaf53354f --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1624501575699319, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02485001075950609 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04976906607582228, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001223285628673813 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22912868657496024, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004000208428173092 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07652804993972759, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015509513452004412 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021280843722581117, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006558396664738548 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09698896856082298, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0026302797431824575 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.033033827764874776, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009553089639751551 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.048710096616471736, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011961828433359935 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.22437608864144468, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003906105502350576 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0749498233935733, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001512700175941009 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04804749736676356, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011967224986575548 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.219947744667869, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0038099663633966267 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.0738050333776553, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015085683805012107 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..544019a4ce1b8ce749ff77b431fa2f7c7c856e89 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1339593574453344, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.011475928272299842 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.049361448132339274, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001239016812669748 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21230159325238288, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003778690654328066 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07387997790765682, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001583264492597526 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.01991718794966518, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006694648433471455 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.08574459105462398, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002452796936423602 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.030311839537456788, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009405456802500011 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04761539861338694, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011459311415354199 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.20795605555159263, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0037279876892756243 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07186428635373836, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015257825086432422 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.047200993059258047, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011619201330870998 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.20380441991273304, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036356400663095085 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07095585503189342, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015280634049346952 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..1d283d8f96f46f576d2fb5f3c2266599e1332090 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1445093157682755, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.011680141740973857 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05381940117828338, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014712457375755592 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.209465867149318, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003670571567082913 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07753925599964062, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016513114634372 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02129680401204643, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007905560723139065 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.08624541141502207, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0025045432057736777 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03145858340503504, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009713899445453562 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.051085987724314566, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013473173376081658 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2034637506343861, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003594440263896121 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07430631587097808, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015425867113632366 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.05151060744188861, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.00139386344479781 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.20131756184017846, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003542645648092287 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07438757103390763, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015672563362475736 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..db459dc37d34175b7bee5c58b2d2ee49fcc66d21 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.15397676959746612, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.020319972800181465 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05495084768413863, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015206432899508664 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.20901533932464228, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003765937704948867 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07895932583644215, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016974724661250095 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021305782148674943, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007895791251539496 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.08650908135775882, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002511055892649366 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03177157501076817, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.001014427068722938 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.05192806645015237, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013573191512714553 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.20293110085708246, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0036649332304372496 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07544181272417243, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015595589465977734 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.05227045530331481, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014172788365244952 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.20101071027811684, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036165987277269767 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.0753990370818791, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015858190086455858 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..c3587ea810f6440ad56fa5b94aa35171742faa75 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.12365669611693078, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.016819858728710592 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.058162947754682705, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001603455708712496 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.20644138216593572, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0037060630938948063 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08126967062478106, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016893296748050316 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021276187110273764, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007384305971847312 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.08615141744067409, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00244568873592459 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03141214421193687, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009319810101283329 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.054616367607262496, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0014155344507793685 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.20085438051037754, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0036562516852884095 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07745748771292646, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015582261760097003 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.055143757729757215, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014690987760715066 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.19951563840190698, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003620210297773779 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07768139042647253, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015879259334824365 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..614bc49afb236c81a9d510322a8019de7321253f --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.08834158708210425, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015422494801128619 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1511240455748458, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0023614082923857573 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10294268147642652, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015885949612612785 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.011969635451153749, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005014249403571861 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.022088097449328107, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009513551465069334 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.014076128249375142, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005452085443492553 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07677846062587532, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001243062818101287 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.1339169734430534, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0020383966006715017 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.0900445479921608, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012930646444676347 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.08184673815697002, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014145060460416376 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.14081489800530161, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0021974572973938104 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.0955398760776783, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014590638597025002 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.6881722062318003, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03492651341120409 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..28c7d6797ce5bdcb4653d35bc28e036029d72f7f --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.09635338878713894, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001429634043206871 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.15055746081916005, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0020409816175998956 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.1079792012109596, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014252549449545956 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.00790430405049511, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0003845584524229758 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.013584133206075426, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0007558234415691987 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.009158861118954552, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000440640430879755 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.0746787940850166, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009973389376698493 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.11936143354392323, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0015295311911311558 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08402340037468532, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009726524560201465 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09102641527702295, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013304985489080228 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.14265063103150583, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0018998082887351517 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.10206113782881272, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013176698304838254 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5189014506992261, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.037670534196124573 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b2eaa8315312f802abc0e56f58db41ec1fe0a723 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.10029653295090936, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014946374809932353 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.14402180107303292, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0020779782644098654 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10582795070072855, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0013794712845124977 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.008057008165996204, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00040900296745959225 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.013786749016989792, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000854235224150827 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.008854865373978756, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00042957921880979654 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07960000286602568, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011035510489881863 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.11668978653923719, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001658242514047369 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08413411350451327, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009822040615333329 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09489301023067956, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014010349563147654 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.13620770042084057, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001949192702183326 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09994328210117027, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0012748001140968398 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5364516905010515, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.030040870725624152 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..03d962cf4e77a465b5098af32c557969349cefe0 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.09733870322770075, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001964614418018535 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1265177788367841, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002283196052782641 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.095155817472981, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015726135414282833 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.010579030486092352, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006361010745718742 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.01457049887287368, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008743705196037304 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.009992333874941724, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0004894216568424037 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07848811204409353, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0015634596721503732 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.10375992972655007, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0018585072141463139 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.07675731421452539, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0011996056298117226 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09111564171943451, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0018284817988930863 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.11839833718232995, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002115864841628711 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.08901182636481536, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014509521046499364 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.7125414859944954, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.041080704322432306 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e018427ac9508f5ab0cccf514062e94964683e42 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.03402776805186772, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015021562092442141 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.044996950843626565, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0018847203928613078 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.03289180000234644, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0013035881201820096 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.004717090436515237, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004903852378905182 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0067399115754938, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000626421604068629 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.004391769717086789, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000363776059433011 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.02808301838439718, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001235285408714272 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.03778261719394529, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001573162142922227 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.027070581766123904, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010387205497611551 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.03161223415677873, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014058140100163263 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.041743224919272987, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0017391700899322268 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.03048627315330421, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0012057898408514823 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.14545889127091902, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.014044744897310252 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..1cab0f141fa8a84d3b00a9fa29072783befb606c --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.006781221068179048, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0008370099388644863 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.007836550901817174, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0009051911934293397 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.005704280866490329, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0006219180243094649 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.0013677198862754726, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0003346680345001492 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0016514234570176281, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00037191127816620903 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0010137961495631932, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00019029294781685254 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.005583704202117562, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0006706928723442628 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.006455316117651287, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0007271642666562989 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.004651743548906982, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.00048649455410650766 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.006405915344244336, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0007905688564967488 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.007287098704531648, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0008304733265241732 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.00532823887838008, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0005738794568348857 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 5.704374135069664e-09, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 1.7013212140847576e-08 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e7b78e86951dd26cff9e87bbc34897e0ed49f713 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..c3c7b4191ce8d0633d237f387cec93cc0ff66401 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.6728852439569457, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.09065446424435947 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.3933396522350936, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.005148320230640675 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.20775934709343202, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0026260859064331757 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.23072445045321552, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002443871760192078 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.1938587834482223, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.005729810617300909 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.05837069594557587, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0012544212946420665 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.06747783829778976, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013159166329732434 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.32691601232449063, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005208463887679732 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.1560034121824963, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019515494752455444 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.17588620890908344, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018377940107973307 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.34888738755497656, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005205421554401587 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.17325308245248913, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.00221766186426316 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.19419672329958557, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002091472191519842 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..0b15cee481aea95a4a86228bca97aebed537855b --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.6759691267622747, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.05152927781934443 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.43498335940641936, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.005452965227789697 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.21613264334760326, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0026224518654289113 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.24046253818668323, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.00234658072085576 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.23691715770918176, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.006378708979164285 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.06142971373009135, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0012573424940348463 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07189745808513573, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0012924257721161703 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.36302611905661375, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005643736562173541 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.16024970690926627, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.001911462317362814 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.18141453249435047, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017217485277798694 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3860721614508426, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005592828702084638 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.17828624080991215, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0021858213523899077 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.20045453712563308, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001974909674126333 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..d77de0d27dcc7fe3716561e49a683f29f2f18863 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.350697048132632, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.16368662036445195 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.42321061678790745, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.005787468869047046 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.19856438121204104, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0027009490378613094 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.22300242392840638, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002500676214947681 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.23874008605296593, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.006530181171757951 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.056759103167357966, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0012360584167915294 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.06712596107139872, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0012794760647159404 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.35806559522674963, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0059374591369078114 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.1476711204601984, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019603780846806818 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.16938082876445262, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.00185869633377821 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.37882347750269213, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005890887100641854 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.1642557288057488, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002242539346776154 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.18678713987354215, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002106687381417484 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7a76e25bb92b5a54137951b6467575ab58c5807b --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.3383066506175627, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.08147359581654341 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4743498123819304, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006154240614260249 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.1934023407385226, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.00264806491029735 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2230383375802249, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0024298858572107357 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.29735568863767275, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.007172641535922273 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.05883954698769347, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0012351467645497919 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07278484255401799, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013293808485782847 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.4120893239123253, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.006381648620379837 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.14689746853919977, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.001923510153311473 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.17373747802751602, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018227045337711142 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.4316295979128506, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006311549546947088 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.16210256786021354, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0021994593339633196 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.18953165573765662, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00204552521574545 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..cc79256c2581c715f4fdb356b20785024750c9e2 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.115844671738753, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.09287010536431305 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.49372599728222144, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006303863309241805 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.1893486587365343, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0026284056137824206 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.22070056495356907, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002412659551940779 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.322127699190832, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0074386563285679 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.05810478895862701, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0012167049976553911 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07298455264569846, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0012981277980822924 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.43350933132635133, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0065815685602999266 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.14446877213741532, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019188306832565957 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.17291135224561338, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018098077726075982 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.45167492487828215, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006510688665251907 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.15821178190359975, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.00216908695004573 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.18736857411665434, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020217436017845682 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_0.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..40e18c6008dc1398b45354ec0bcbac5f1713966e --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.09617233932550984, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0015968573233213028 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2360757694545423, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.00370387203918051 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.13453387679982023, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002137215663359666 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.016345450182953684, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007889657019727539 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.040945287238629884, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0018815137332123609 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.023009362416004837, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010848983621226595 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.08358012787593269, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0013109418670182917 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.20600731487938223, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003075514228900137 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11696204362015787, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001739862040807931 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.074261757400661, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0012755865913296775 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.18350710093496767, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.003027114522321686 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.10393138760084052, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016974542494408457 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.9122699611037882, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.06570902741835642 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_1.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6406a6044af6e644b1fb728f07ad506afd9ef1f6 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08459268678603156, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0015063922805013791 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21062699869544751, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0035228860393077184 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.11916742202289989, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020441367601762984 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.012597930749107738, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007377841672586165 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.031999643834912925, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017919904739554316 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.017857255626480974, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010224310064310912 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07611417379359779, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012742619154937661 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.19044265188596107, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0030309210607359196 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10734992521262915, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001730288249458564 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06528504699855653, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001169674218531962 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16387535729166744, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0028298209473616265 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09212215013971556, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015951047932129575 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6934236139159788, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.055189833618672024 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_2.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..63fbde078db51fd0da254931b02bcb8f951abf63 --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08310989226757869, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014496423333643577 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2077051554329215, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0034244055888820804 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.11714486547191727, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0019721020649582028 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.012193809590115543, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006986539554457616 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.031175575241520804, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017093872902993448 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.017297006731733434, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009663380265457396 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.075661760756666, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001238392153577493 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18972091925552043, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0029483233194428997 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10673163556701443, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001683180060909063 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06352129242042852, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011102051150740533 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16014108387182785, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0026842238281168103 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.08969158968905694, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015132067354959346 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6570261539221618, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.06457679953165572 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_3.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..71a7490dd752a955b6c030fbb8e96f1119e2f17c --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08501067090237822, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0017615025396123855 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.20134376204343213, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0035158650629011513 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.11594351096508414, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020790775255337675 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.013112083342324879, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007544397415908959 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03158394076946137, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001687518797011113 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.017911502606647596, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.000985564236124104 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07777247652070679, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001507255909258555 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18553333666910865, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003100840457412398 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10646367058425908, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001810498716362027 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06541409584763817, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001407070467981226 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.15558136711822532, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0027429242918312374 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.08910829543273951, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016024522291310029 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.7422613790495289, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.06514723413231459 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_4.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..63d3cbb7494ffb7d42deeab9a1de929251b4596c --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.031243353659348713, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0022406794856341195 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.052779876794259606, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0031003687375510277 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.03408594165345016, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020050874444915642 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0054699172104788465, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.001017227653179516 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.00935841193125638, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0010245180706557962 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.005875450428271828, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0006460157370222755 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.0275752093040082, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0019600165937700346 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.04754293835542539, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0027737773827495025 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.03028975918570269, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017390916872331722 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.024866785974796596, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0018932649328032359 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.041138743222902195, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0024674237330471085 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.026508905866873923, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015700802469097192 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.45434695424591715, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.10483639531272293 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_5.json b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..af8f59d7b8c60ece60481d0306c4af8a6d662cbe --- /dev/null +++ b/83m20b1b5/evaluation/generation/slim.83m20b1b5_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.003204464726749907, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0009476798992127986 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.0027101036549158557, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0007645441485834611 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.002892090360931839, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0008332937221299973 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0009424152737138307, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0004633862018340062 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.0007161109606491931, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.000341349884950177 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.0008079947232119309, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0003898181285980445 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.0024671672869581627, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0007402438440876393 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0020939538292462866, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.000594696151720948 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.0022266454683835834, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0006476197460812914 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.00268168331723446, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0008163559154327512 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.0022685619916896034, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0006585699169545011 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.002417399696949334, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0007164145255356163 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 3.293954829261569e-37, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 4.816123484116706e-31 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_0.csv b/83m20b1b5/evaluation/rankeval/83m20b1b5_0.csv new file mode 100644 index 0000000000000000000000000000000000000000..8e63ec92f1871fa394c41863053170829134f39b --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_0.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.33,0.01487687202745673,0 +anli_r2,acc,0.34,0.014987482264363937,0 +anli_r3,acc,0.33,0.013579531277800922,0 +arc_challenge,acc,0.1680887372013652,0.010927715046124858,0 +arc_challenge,acc_norm,0.21075085324232082,0.011918271754852197,0 +arc_easy,acc,0.37584175084175087,0.009938436373170623,0 +arc_easy,acc_norm,0.34974747474747475,0.009785578618940735,0 +boolq,acc,0.6076452599388379,0.008539983838167725,1 +cb,acc,0.39285714285714285,0.0658538889806635,1 +cb,f1,0.2798600699650175,,1 +copa,acc,0.55,0.04999999999999999,0 +hellaswag,acc,0.27106154152559253,0.004435993492583859,0 +hellaswag,acc_norm,0.2753435570603465,0.004457743287380272,0 +piqa,acc,0.6126224156692056,0.01136603808343591,0 +piqa,acc_norm,0.6055495103373232,0.011402931101558378,0 +rte,acc,0.5126353790613718,0.030086851767188564,0 +sciq,acc,0.63,0.015275252316519362,0 +sciq,acc_norm,0.561,0.015701131345400774,0 +storycloze_2016,acc,0.5558524853019775,0.011490067784518675,0 +winogrande,acc,0.5153906866614049,0.014045826789783672,0 diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_0.json b/83m20b1b5/evaluation/rankeval/83m20b1b5_0.json new file mode 100644 index 0000000000000000000000000000000000000000..acf50ef2e9ad793f87a7a1c50ef51596f70332d4 --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_0.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.33, + "acc_stderr": 0.01487687202745673 + }, + "anli_r2": { + "acc": 0.34, + "acc_stderr": 0.014987482264363937 + }, + "anli_r3": { + "acc": 0.33, + "acc_stderr": 0.013579531277800922 + }, + "cb": { + "acc": 0.39285714285714285, + "acc_stderr": 0.0658538889806635, + "f1": 0.2798600699650175 + }, + "copa": { + "acc": 0.55, + "acc_stderr": 0.04999999999999999 + }, + "hellaswag": { + "acc": 0.27106154152559253, + "acc_stderr": 0.004435993492583859, + "acc_norm": 0.2753435570603465, + "acc_norm_stderr": 0.004457743287380272 + }, + "rte": { + "acc": 0.5126353790613718, + "acc_stderr": 0.030086851767188564 + }, + "winogrande": { + "acc": 0.5153906866614049, + "acc_stderr": 0.014045826789783672 + }, + "storycloze_2016": { + "acc": 0.5558524853019775, + "acc_stderr": 0.011490067784518675 + }, + "boolq": { + "acc": 0.6076452599388379, + "acc_stderr": 0.008539983838167725 + }, + "arc_easy": { + "acc": 0.37584175084175087, + "acc_stderr": 0.009938436373170623, + "acc_norm": 0.34974747474747475, + "acc_norm_stderr": 0.009785578618940735 + }, + "arc_challenge": { + "acc": 0.1680887372013652, + "acc_stderr": 0.010927715046124858, + "acc_norm": 0.21075085324232082, + "acc_norm_stderr": 0.011918271754852197 + }, + "sciq": { + "acc": 0.63, + "acc_stderr": 0.015275252316519362, + "acc_norm": 0.561, + "acc_norm_stderr": 0.015701131345400774 + }, + "piqa": { + "acc": 0.6126224156692056, + "acc_stderr": 0.01136603808343591, + "acc_norm": 0.6055495103373232, + "acc_norm_stderr": 0.011402931101558378 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_1.csv b/83m20b1b5/evaluation/rankeval/83m20b1b5_1.csv new file mode 100644 index 0000000000000000000000000000000000000000..f2e86c85c1f1369ac32c1eace4a7e4ee29a97fae --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_1.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.324,0.01480686473373886,0 +anli_r2,acc,0.32,0.014758652303574886,0 +anli_r3,acc,0.3275,0.013553211167251954,0 +arc_challenge,acc,0.181740614334471,0.011269198948880236,0 +arc_challenge,acc_norm,0.21416382252559726,0.01198838320596649,0 +arc_easy,acc,0.36153198653198654,0.009858506543162067,0 +arc_easy,acc_norm,0.33964646464646464,0.009717845628687468,0 +boolq,acc,0.5706422018348624,0.008657333755353673,1 +cb,acc,0.4642857142857143,0.06724777654937658,1 +cb,f1,0.33042846768336964,,1 +copa,acc,0.57,0.049756985195624284,0 +hellaswag,acc,0.2709619597689703,0.0044354815159093975,0 +hellaswag,acc_norm,0.2735510854411472,0.004448701611795092,0 +piqa,acc,0.6060935799782372,0.011400182224698895,0 +piqa,acc_norm,0.5979325353645266,0.01143986712726753,0 +rte,acc,0.5306859205776173,0.03003973059219781,0 +sciq,acc,0.628,0.015292149942040577,0 +sciq,acc_norm,0.567,0.01567663091218133,0 +storycloze_2016,acc,0.5505077498663816,0.01150328869979918,0 +winogrande,acc,0.5027624309392266,0.014052271211616452,0 diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_1.json b/83m20b1b5/evaluation/rankeval/83m20b1b5_1.json new file mode 100644 index 0000000000000000000000000000000000000000..0ebd1a0f04f9b658348c23d9b619f12d681ed4a4 --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_1.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.324, + "acc_stderr": 0.01480686473373886 + }, + "anli_r2": { + "acc": 0.32, + "acc_stderr": 0.014758652303574886 + }, + "anli_r3": { + "acc": 0.3275, + "acc_stderr": 0.013553211167251954 + }, + "cb": { + "acc": 0.4642857142857143, + "acc_stderr": 0.06724777654937658, + "f1": 0.33042846768336964 + }, + "copa": { + "acc": 0.57, + "acc_stderr": 0.049756985195624284 + }, + "hellaswag": { + "acc": 0.2709619597689703, + "acc_stderr": 0.0044354815159093975, + "acc_norm": 0.2735510854411472, + "acc_norm_stderr": 0.004448701611795092 + }, + "rte": { + "acc": 0.5306859205776173, + "acc_stderr": 0.03003973059219781 + }, + "winogrande": { + "acc": 0.5027624309392266, + "acc_stderr": 0.014052271211616452 + }, + "storycloze_2016": { + "acc": 0.5505077498663816, + "acc_stderr": 0.01150328869979918 + }, + "boolq": { + "acc": 0.5706422018348624, + "acc_stderr": 0.008657333755353673 + }, + "arc_easy": { + "acc": 0.36153198653198654, + "acc_stderr": 0.009858506543162067, + "acc_norm": 0.33964646464646464, + "acc_norm_stderr": 0.009717845628687468 + }, + "arc_challenge": { + "acc": 0.181740614334471, + "acc_stderr": 0.011269198948880236, + "acc_norm": 0.21416382252559726, + "acc_norm_stderr": 0.01198838320596649 + }, + "sciq": { + "acc": 0.628, + "acc_stderr": 0.015292149942040577, + "acc_norm": 0.567, + "acc_norm_stderr": 0.01567663091218133 + }, + "piqa": { + "acc": 0.6060935799782372, + "acc_stderr": 0.011400182224698895, + "acc_norm": 0.5979325353645266, + "acc_norm_stderr": 0.01143986712726753 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_2.csv b/83m20b1b5/evaluation/rankeval/83m20b1b5_2.csv new file mode 100644 index 0000000000000000000000000000000000000000..69a4a6e43d4cb03ca3373b107392771aac7b6a43 --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_2.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.303,0.014539683710535264,0 +anli_r2,acc,0.33,0.01487687202745673,0 +anli_r3,acc,0.3325,0.013605417345710526,0 +arc_challenge,acc,0.1697952218430034,0.010971775157784204,0 +arc_challenge,acc_norm,0.21245733788395904,0.011953482906582947,0 +arc_easy,acc,0.35269360269360267,0.00980442059937866,0 +arc_easy,acc_norm,0.3425925925925926,0.009738105469984184,0 +boolq,acc,0.5963302752293578,0.008581220435616826,1 +cb,acc,0.39285714285714285,0.0658538889806635,1 +cb,f1,0.27357609710550884,,1 +copa,acc,0.57,0.049756985195624284,0 +hellaswag,acc,0.26867157936666003,0.004423628080052017,0 +hellaswag,acc_norm,0.27215694084843656,0.004441606665787932,0 +piqa,acc,0.6033732317736671,0.011413778810510455,0 +piqa,acc_norm,0.5957562568008705,0.011449891763007465,0 +rte,acc,0.48375451263537905,0.030080573208738064,0 +sciq,acc,0.621,0.01534909100222535,0 +sciq,acc_norm,0.556,0.01571976816340209,0 +storycloze_2016,acc,0.5467664350614645,0.011511744771088354,0 +winogrande,acc,0.5059194948697711,0.01405150083848581,0 diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_2.json b/83m20b1b5/evaluation/rankeval/83m20b1b5_2.json new file mode 100644 index 0000000000000000000000000000000000000000..aa9f6c03dceb97c73ab51c79e5c1c29a41c2ce9a --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_2.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.303, + "acc_stderr": 0.014539683710535264 + }, + "anli_r2": { + "acc": 0.33, + "acc_stderr": 0.01487687202745673 + }, + "anli_r3": { + "acc": 0.3325, + "acc_stderr": 0.013605417345710526 + }, + "cb": { + "acc": 0.39285714285714285, + "acc_stderr": 0.0658538889806635, + "f1": 0.27357609710550884 + }, + "copa": { + "acc": 0.57, + "acc_stderr": 0.049756985195624284 + }, + "hellaswag": { + "acc": 0.26867157936666003, + "acc_stderr": 0.004423628080052017, + "acc_norm": 0.27215694084843656, + "acc_norm_stderr": 0.004441606665787932 + }, + "rte": { + "acc": 0.48375451263537905, + "acc_stderr": 0.030080573208738064 + }, + "winogrande": { + "acc": 0.5059194948697711, + "acc_stderr": 0.01405150083848581 + }, + "storycloze_2016": { + "acc": 0.5467664350614645, + "acc_stderr": 0.011511744771088354 + }, + "boolq": { + "acc": 0.5963302752293578, + "acc_stderr": 0.008581220435616826 + }, + "arc_easy": { + "acc": 0.35269360269360267, + "acc_stderr": 0.00980442059937866, + "acc_norm": 0.3425925925925926, + "acc_norm_stderr": 0.009738105469984184 + }, + "arc_challenge": { + "acc": 0.1697952218430034, + "acc_stderr": 0.010971775157784204, + "acc_norm": 0.21245733788395904, + "acc_norm_stderr": 0.011953482906582947 + }, + "sciq": { + "acc": 0.621, + "acc_stderr": 0.01534909100222535, + "acc_norm": 0.556, + "acc_norm_stderr": 0.01571976816340209 + }, + "piqa": { + "acc": 0.6033732317736671, + "acc_stderr": 0.011413778810510455, + "acc_norm": 0.5957562568008705, + "acc_norm_stderr": 0.011449891763007465 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_3.csv b/83m20b1b5/evaluation/rankeval/83m20b1b5_3.csv new file mode 100644 index 0000000000000000000000000000000000000000..1425c960648ac8ea717fe0e787d83465f81adaad --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_3.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.322,0.01478291360099669,0 +anli_r2,acc,0.356,0.015149042659306626,0 +anli_r3,acc,0.3358333333333333,0.013639261190932884,0 +arc_challenge,acc,0.17747440273037543,0.011165138769643946,0 +arc_challenge,acc_norm,0.21075085324232082,0.011918271754852184,0 +arc_easy,acc,0.34175084175084175,0.009732359564894589,0 +arc_easy,acc_norm,0.3341750841750842,0.009679106032919063,0 +boolq,acc,0.6018348623853211,0.008561755594317447,1 +cb,acc,0.35714285714285715,0.0646095738380922,1 +cb,f1,0.24904214559386975,,1 +copa,acc,0.6,0.049236596391733084,0 +hellaswag,acc,0.27016530571599284,0.004431375549911369,0 +hellaswag,acc_norm,0.2719577773351922,0.004440588618232718,0 +piqa,acc,0.6055495103373232,0.011402931101558383,0 +piqa,acc_norm,0.5963003264417845,0.011447407541749086,0 +rte,acc,0.4548736462093863,0.029973636495415252,0 +sciq,acc,0.614,0.015402637476784376,0 +sciq,acc_norm,0.569,0.015667944488173508,0 +storycloze_2016,acc,0.5483698556921432,0.01150820114592835,0 +winogrande,acc,0.5067087608524072,0.014051220692330349,0 diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_3.json b/83m20b1b5/evaluation/rankeval/83m20b1b5_3.json new file mode 100644 index 0000000000000000000000000000000000000000..06f7b53eeeef089901c311f23115ce5486242ae4 --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_3.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.322, + "acc_stderr": 0.01478291360099669 + }, + "anli_r2": { + "acc": 0.356, + "acc_stderr": 0.015149042659306626 + }, + "anli_r3": { + "acc": 0.3358333333333333, + "acc_stderr": 0.013639261190932884 + }, + "cb": { + "acc": 0.35714285714285715, + "acc_stderr": 0.0646095738380922, + "f1": 0.24904214559386975 + }, + "copa": { + "acc": 0.6, + "acc_stderr": 0.049236596391733084 + }, + "hellaswag": { + "acc": 0.27016530571599284, + "acc_stderr": 0.004431375549911369, + "acc_norm": 0.2719577773351922, + "acc_norm_stderr": 0.004440588618232718 + }, + "rte": { + "acc": 0.4548736462093863, + "acc_stderr": 0.029973636495415252 + }, + "winogrande": { + "acc": 0.5067087608524072, + "acc_stderr": 0.014051220692330349 + }, + "storycloze_2016": { + "acc": 0.5483698556921432, + "acc_stderr": 0.01150820114592835 + }, + "boolq": { + "acc": 0.6018348623853211, + "acc_stderr": 0.008561755594317447 + }, + "arc_easy": { + "acc": 0.34175084175084175, + "acc_stderr": 0.009732359564894589, + "acc_norm": 0.3341750841750842, + "acc_norm_stderr": 0.009679106032919063 + }, + "arc_challenge": { + "acc": 0.17747440273037543, + "acc_stderr": 0.011165138769643946, + "acc_norm": 0.21075085324232082, + "acc_norm_stderr": 0.011918271754852184 + }, + "sciq": { + "acc": 0.614, + "acc_stderr": 0.015402637476784376, + "acc_norm": 0.569, + "acc_norm_stderr": 0.015667944488173508 + }, + "piqa": { + "acc": 0.6055495103373232, + "acc_stderr": 0.011402931101558383, + "acc_norm": 0.5963003264417845, + "acc_norm_stderr": 0.011447407541749086 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_4.csv b/83m20b1b5/evaluation/rankeval/83m20b1b5_4.csv new file mode 100644 index 0000000000000000000000000000000000000000..96d62292d04d37fcf74c1478c45048977936c8b0 --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_4.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.343,0.015019206922356951,0 +anli_r2,acc,0.319,0.01474640486547349,0 +anli_r3,acc,0.32916666666666666,0.013570806258433621,0 +arc_challenge,acc,0.1757679180887372,0.011122850863120485,0 +arc_challenge,acc_norm,0.2158703071672355,0.012022975360030663,0 +arc_easy,acc,0.3468013468013468,0.009766326091716005,0 +arc_easy,acc_norm,0.3463804713804714,0.009763542075695734,0 +boolq,acc,0.5996941896024465,0.008569459037299691,1 +cb,acc,0.375,0.06527912098338669,1 +cb,f1,0.2587134502923976,,1 +copa,acc,0.54,0.05009082659620332,0 +hellaswag,acc,0.270264887472615,0.004431889783633827,0 +hellaswag,acc_norm,0.2722565226050587,0.004442115268580945,0 +piqa,acc,0.6093579978237215,0.011383377760053584,0 +piqa,acc_norm,0.5979325353645266,0.011439867127267531,0 +rte,acc,0.4620938628158845,0.030009848912529117,0 +sciq,acc,0.625,0.015316971293620996,0 +sciq,acc_norm,0.568,0.015672320237336206,0 +storycloze_2016,acc,0.5521111704970604,0.011499463505491372,0 +winogrande,acc,0.500394632991318,0.014052481306049512,0 diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_4.json b/83m20b1b5/evaluation/rankeval/83m20b1b5_4.json new file mode 100644 index 0000000000000000000000000000000000000000..679e1b0a5509f0af8b1f5f38dd4627230067dc47 --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_4.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.343, + "acc_stderr": 0.015019206922356951 + }, + "anli_r2": { + "acc": 0.319, + "acc_stderr": 0.01474640486547349 + }, + "anli_r3": { + "acc": 0.32916666666666666, + "acc_stderr": 0.013570806258433621 + }, + "cb": { + "acc": 0.375, + "acc_stderr": 0.06527912098338669, + "f1": 0.2587134502923976 + }, + "copa": { + "acc": 0.54, + "acc_stderr": 0.05009082659620332 + }, + "hellaswag": { + "acc": 0.270264887472615, + "acc_stderr": 0.004431889783633827, + "acc_norm": 0.2722565226050587, + "acc_norm_stderr": 0.004442115268580945 + }, + "rte": { + "acc": 0.4620938628158845, + "acc_stderr": 0.030009848912529117 + }, + "winogrande": { + "acc": 0.500394632991318, + "acc_stderr": 0.014052481306049512 + }, + "storycloze_2016": { + "acc": 0.5521111704970604, + "acc_stderr": 0.011499463505491372 + }, + "boolq": { + "acc": 0.5996941896024465, + "acc_stderr": 0.008569459037299691 + }, + "arc_easy": { + "acc": 0.3468013468013468, + "acc_stderr": 0.009766326091716005, + "acc_norm": 0.3463804713804714, + "acc_norm_stderr": 0.009763542075695734 + }, + "arc_challenge": { + "acc": 0.1757679180887372, + "acc_stderr": 0.011122850863120485, + "acc_norm": 0.2158703071672355, + "acc_norm_stderr": 0.012022975360030663 + }, + "sciq": { + "acc": 0.625, + "acc_stderr": 0.015316971293620996, + "acc_norm": 0.568, + "acc_norm_stderr": 0.015672320237336206 + }, + "piqa": { + "acc": 0.6093579978237215, + "acc_stderr": 0.011383377760053584, + "acc_norm": 0.5979325353645266, + "acc_norm_stderr": 0.011439867127267531 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_5.csv b/83m20b1b5/evaluation/rankeval/83m20b1b5_5.csv new file mode 100644 index 0000000000000000000000000000000000000000..7095cc120572fe07bc1b1fd1fa42db64c772feca --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_5.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.335,0.014933117490932572,0 +anli_r2,acc,0.332,0.014899597242811473,0 +anli_r3,acc,0.35583333333333333,0.013826518748493319,0 +arc_challenge,acc,0.181740614334471,0.011269198948880234,0 +arc_challenge,acc_norm,0.2150170648464164,0.01200571763413361,0 +arc_easy,acc,0.35185185185185186,0.009799078929868713,0 +arc_easy,acc_norm,0.33754208754208753,0.009703117820790303,0 +boolq,acc,0.6012232415902141,0.008563973987729909,1 +cb,acc,0.44642857142857145,0.06703189227942397,1 +cb,f1,0.3114219114219114,,1 +copa,acc,0.55,0.04999999999999998,0 +hellaswag,acc,0.2709619597689703,0.0044354815159093975,0 +hellaswag,acc_norm,0.27215694084843656,0.00444160666578793,0 +piqa,acc,0.6055495103373232,0.011402931101558383,0 +piqa,acc_norm,0.5897714907508161,0.01147625603635911,0 +rte,acc,0.5090252707581228,0.030091559826331334,0 +sciq,acc,0.62,0.015356947477797582,0 +sciq,acc_norm,0.588,0.015572363292015095,0 +storycloze_2016,acc,0.5510422234099412,0.011502027057558886,0 +winogrande,acc,0.4964483030781373,0.014052131146915867,0 diff --git a/83m20b1b5/evaluation/rankeval/83m20b1b5_5.json b/83m20b1b5/evaluation/rankeval/83m20b1b5_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3d2dd7363a4f8d4610d14e6ccf3505743c86130e --- /dev/null +++ b/83m20b1b5/evaluation/rankeval/83m20b1b5_5.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.335, + "acc_stderr": 0.014933117490932572 + }, + "anli_r2": { + "acc": 0.332, + "acc_stderr": 0.014899597242811473 + }, + "anli_r3": { + "acc": 0.35583333333333333, + "acc_stderr": 0.013826518748493319 + }, + "cb": { + "acc": 0.44642857142857145, + "acc_stderr": 0.06703189227942397, + "f1": 0.3114219114219114 + }, + "copa": { + "acc": 0.55, + "acc_stderr": 0.04999999999999998 + }, + "hellaswag": { + "acc": 0.2709619597689703, + "acc_stderr": 0.0044354815159093975, + "acc_norm": 0.27215694084843656, + "acc_norm_stderr": 0.00444160666578793 + }, + "rte": { + "acc": 0.5090252707581228, + "acc_stderr": 0.030091559826331334 + }, + "winogrande": { + "acc": 0.4964483030781373, + "acc_stderr": 0.014052131146915867 + }, + "storycloze_2016": { + "acc": 0.5510422234099412, + "acc_stderr": 0.011502027057558886 + }, + "boolq": { + "acc": 0.6012232415902141, + "acc_stderr": 0.008563973987729909 + }, + "arc_easy": { + "acc": 0.35185185185185186, + "acc_stderr": 0.009799078929868713, + "acc_norm": 0.33754208754208753, + "acc_norm_stderr": 0.009703117820790303 + }, + "arc_challenge": { + "acc": 0.181740614334471, + "acc_stderr": 0.011269198948880234, + "acc_norm": 0.2150170648464164, + "acc_norm_stderr": 0.01200571763413361 + }, + "sciq": { + "acc": 0.62, + "acc_stderr": 0.015356947477797582, + "acc_norm": 0.588, + "acc_norm_stderr": 0.015572363292015095 + }, + "piqa": { + "acc": 0.6055495103373232, + "acc_stderr": 0.011402931101558383, + "acc_norm": 0.5897714907508161, + "acc_norm_stderr": 0.01147625603635911 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5dbd060aca217a8889b22b5de97a28d66867d24 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce473a5f97f8ea6f3a911c7b5a32117d4b6fb4979b7667c079fdbf325b0deda6 +size 15518743 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c172405c88541289a2399a3a1d809c6ba425560 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467a8add1ac1c9d040378d49dfe5aa4ea7ce9b3ad2cba2097738170a3bfb0ec9 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..46a01c9188161da8b62f440f650f897dbab17f2d --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66fa64b258c4d367e825146823413db320a587657b8138a548b67e9b87d63bc +size 15518626 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..60d7631bddd10bbf843b6c910c291828ce4332e9 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de725a3a544cca2a65e1de189c64715412f0306394ed7ab1b4b5400d0a18f74 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e477b7928ceedcd40486817d69849e6462ff7d8 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d92079572bea6cf5e36ef803f65f0aed2fded88cc2c385af38be41ced1fa7f0 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5033260892d7b782e3fbca74fe63dc2258355278 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503f617d2f4c40ebf4765411cd23e29fd2667bc7c6eda51df568043637d4a651 +size 15518626 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..da62ba75cccee1ddb38ab02ba02c8cc87940824e --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673f62c97460e07934c6ff62d17e87da0326069436f6165e53f933bba8ea8497 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f97917c8f4d95429871257037dfbc932e4cbefb7 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b821f108a8351973b7ba1e3a9657a94ddb6d138a8b2ffcc2324a87cf8a9a8341 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3dc7f73985405713f4799fc382fe41f946bde02 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1801444423f85677a08a7e583bdcb33f59a3fa4596ed2e742ff234569c13a8 +size 15518626 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc2608e435ff7f34fdf5c8e8262f10084ae05bf5 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f06593e9683037a35d135fd4f3d400451de2801f6b3cb9816ca4aa438a7b9da6 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c725e2d099e7021eca8e73dc4aecfca75449295 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bbe6bde8bda9bbabec752eed429b6438dec4b0c90dbdbe1e7d8071152c75a37 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..299e9897e94716ff55f2afa91efa82b0ca667254 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5c690c708e58e0d0d3bce71209adba5a01d8c41936d75481af56f1285bc07c +size 15518615 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2745fe6661239a632f1d1837230440689fa323a1 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e779ef2f6f70968ae363ab241f4feec1c861896cd331084bd85ff7dca60d01 +size 15518626 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ed219bc00f8a77cd33d40e42464f0ccf0d00cf4 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d534f2de2cc90a4f8af0ae78d675df617e1f7f916e8c7b6269e8101aea19fd62 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d64767516340a975fde4ce11ad823e6cdc31e1e --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90243a261ada2c9808634761a0730deacf24ce7543e29faa8794df017ff2911 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bc29e31a9002852455395fb05db60d17a54f214 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631539695cc0468f570271deaad932888cd3c4407a2391830b6f9de5999bdda2 +size 15518626 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac1da8e619a1c94a7f1c6469a3ee323b189f122e --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e48373921caf9b262319b95f286a857fdd2e4595f381a0ced8535caafbcf0ca +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..217dd2e9447877ced405d7d305e898ac024873e7 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:730c9a235980881f29cb94f5335b79804c6c0ad9c143b0c3294b539837dec46c +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec54e1a17c03b4fffdd97f6bf58e5297ecc0b98b --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f5fe7c3976af9c528f95737821953f7b745cfed30fea8e912970f145abe5f2 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4513034da58736437d91fd099a048dedf5a8b9bc --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b1939e546ed2e13ddedbdb2d616c802a49cd68b730472c417eecceecb13b15 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a684c520ad2be0163f8006f7fedcb382436e792 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395ff50846160f9e6b88aef6428e1341fbddd104f807e90ba8d35c12cffa8f98 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..05934004afcebda51dc806b08b252161c87441d2 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e7358a2d01e4b0b9abdd9dd86abed8413117e1337accbcd751b2a5828d21201 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa4b7919c9173d1925b2e362ac4e45cdb12ec3d2 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2858904103f1e02f1aa7896457936ae6333da815919cece5bc3ef86b0cd3bf +size 15518743 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8635374015a938f55a360a40b8b93278a3b8bcec --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:886eb48711e3d3a19d63d25694ca9f7d8eb50a4a9b08c98fba0084945cf8b006 +size 15518626 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..47bc4107e7fe55128085151ad76b707d0ccc78e1 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a1f7b6fa9b1b4467383b9252a8d2859db6f1366cf43584dc4f8eb4660dcab1 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c0cd1cf3a011d8ee5b7f6f4446d959217c8dd5b --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0666411c3a8fdc66db4b6387781e6b8f6196194379e10dd2ed3a5007bb19792f +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97ae1ee6d7b9f1917e9c732db61291fdb595c3c6 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6488cd1326461ebec30782c335903460633f5d47ff6fd187dc0b6aa319ec5e4 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..70890fecda6863a83acf764c7df940fbaf65babf --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c28b9939cad20f422007000223cdd20ca224d1e6fc7d3833c6630ed9538808bc +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9331b65026ddfeb9bf4eb6b272f8f7a1d40c1eb9 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ab0fd4abeae8a7373f864ff055a19e4176773f14742c9c377ae09ed8565c18 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b57bcdbea8fcf29ff662a634d5698ed609f66f4d --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db8f2eba1fe71d1448bd681cf95425d5931a28e23f4ff97508f80ba76ae64cf +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fce0e17392f41ce99baf5c91b9a1e2c58b74cf51 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96aac45b9328e2803bb745132cdeaad20214afec4adb1ea3af2332a4475342d6 +size 15518818 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42e01c835b5bff09a67f9c3ad7081978c42b0c71 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:147408fe37aaacbeb4110e75a350a2eeb22f39eca1cafb03c720c0ab7a7726e9 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3fe8603eb6f9a9771d65057fcc9152acdcb289d --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033332ad27daf09172d1b86d2b04073767b7a0bde252cdb0fd8cf7c63c969e97 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5139a5ab9da2e22bf1c35642b7d068f0466d7ae --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f9619fa2fd9a09d06fb90303b33ce9fef12deadc023afde9be7a188b438a9c +size 15518743 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd1902a4c6122ccd4b597316d68b7ea9707783be --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38ee9b7a06dbdc53ae1d40dcc51d1edc69e1ce283034d5953d595dc77be2f949 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b21b5bc8f461191aa55afb026db46889722def7f --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07be7e73f374ec0d82281e6946931af074dbb76fb0e0e319291bd1551dc0c3a3 +size 15518818 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f12a89f23d77828c2fb1f68b5db8bbde22c0d56 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f728428f76819e8043c218f372cf889428d7448fd0171aaa6390bffd079714d3 +size 15518626 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a31741713ad425bf3006a0f09fe19a7949ba9dc --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56aea8103cb161a24a5d4b79eb1f768cff23e96379c335abb7b11fff527b993 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f35f870befa8862b7cd7898eba4215499bba990 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c89605e91a4255fd18ede08f46f323e1c3bdc8111ea44763591b4aad531dbc +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc9682415550a91c00cf152eec30c8191fda5ebe --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9dbd120490893122e4a652eda50abe4054ecd48f24e5e1f8aca23249475418b +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..85b3e5e81ab5f457fdb27ebbeeb5ff585d489279 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eab4636922dda1c2548e0f7b55d1026ee1f1b1b544f2b1f386c3445a7aec95b +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ffde0dbe33e6cf8fe06691f114524ed05e3d9cf --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a87911f3067721a7264fd3ef61f99b45b011ba98581e72749bb5f970e76c36 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..377a381dbad552513000d098ba0a1258ce6a0720 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495ffcd594427c92f29f12ad960d5348f527efc75c15daa7e111b21e3b725ae4 +size 15518562 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f816d0be6ad8fa5e42c9b84e0bbda5bfa3f49067 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdab1323a5574ee1d802fe7e10dfb062c26c3afb37d57b1ad3a085fc8db20beb +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c8b5cae40be8917bf0b286a2ac6e37de844d4aa --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed7f7279683900e30ab19a4ca71ab46cd63beb8d37a6f70018bed3efbe0bc29 +size 15518615 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e125ce63ff231990d91d4c174a013941ae1d2504 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:698012684f5574221448ba3c598af1d884ce55dfbd31bfb540d4b216fc76478f +size 15518818 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f052e8f414c237fd8f697dcfda67a3f4fdc5a56b --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29fab1244ec31254b23d47932ddcf84e4e70ced649f55ffb50e7d94f69860c4 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..84718e4178f369157dcd3f55619d53aee5cb856b --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e351b6a6160b6a37763ee0432539bb2a389e2048ce7fcb7bf288b8d6599daed +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9adc80d576159c26f945e52bd546bd33bfa0b60a --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d88924bd51d7eb3458f77321f40ed81ad7b48cdcfafecfc3c6cbd84f5c1ffca +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fc0a30b4500521ea04264af8ebb970daf84a377 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47feb337b27d52955607b044ccbebe1c6105b63a15c1137b70e0664674fef835 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb854515e49cf7649c6e4aac6a59f7e694e68559 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26bb35da3554515f66985451fe53ae3567737ec69b5a8c96088a04aaa1bc0054 +size 15518626 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..162d67aa2c2741430b5df33ae9e0a5766cf5da63 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19d8e39f539e3a93bfa9b6b8a96b38a4d86ce90bdbc58b06d7399b61bd9dbb8d +size 15518818 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d276bef086c790401fd798945d11421647ad71d7 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78909c756ea4dd3fdb258484182d8744139b9b5a991f8714dff0bc66a29b5262 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b8c9ca3b1223181dc44cc5d2e09ed8dfa44185d --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:550d5dd2806e78a488cc86c065f128c2a0f7687b619b31e595caa67b7fd069a2 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3cc3e6c7711ed1a37579ad99cca00b3421b62767 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0814bd9496b3e48d45e5aadb258504c0f7dd754a803c48c3a8483c162e9ba78 +size 15518754 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd44f132431b86b5d4c36cda897bd081d31dafc6 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9731a4c75795ed1a020af789a71ba4de99fb6f8cb3b774d88251636c4a521c +size 15518679 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2eff47df9d131b0fd14aaf060492bdfc5fafc060 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ead48167b6aaa1bfc57b4011194483e60c7010e2904145b3b7d2b0a44b42bce +size 15518818 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f007b3e3ba3720b359f0314ca1fb08c023de6ac --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398febd57b7345b3913ad70c41d06e70c311fafc9306092b114093ad104e21a4 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f044a7d03d207ac0f42cb717ba162e76deefdddb --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf44a0e3f5eb14822438cd3f0afabe6c9a85709d910d7b666df7a3300148b217 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8c28c51d6e98fa10ee4e8b2c8d8c9dbd2e24ed1 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19062f03a629a23906e0cf25fda4be44b5f5cba96a07c7d7c121e604d64f4364 +size 15518690 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2950e311c60ede0185b81b4a5a1399758ac4dc7 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb8398d392620afe4006dfa5c980c0db0a4771873eacca2e782f4e194bac2ccc +size 15518743 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..19f67d6594c16438f1b2b28a8d9d1acd90d115d7 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08f2e1613f38230c9c180e36d7ba65e2705ff7e366f06cce3c016ca0ddb83124 +size 15518679 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93718db09509e7ba6299531b9da237338aefe2c2 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ff062282f9549f2b661970cefefe99858cca7943945e252fb28b7102fa8068 +size 15518679 diff --git a/83m20b1b5/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/83m20b1b5/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e18b3e343b9b3afb33da0e2b331af3af5eb6641 --- /dev/null +++ b/83m20b1b5/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4df0003cf69c301a533c1815462af4df00c66f6d959aafe9d29102e2c75a34a +size 15518743 diff --git a/83m20b1b5/global_step37905/layer_01-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..285b0c8d290c290fe89eed9a39ac92cd2fe26d48 --- /dev/null +++ b/83m20b1b5/global_step37905/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac10e5ab37af881daaba216feb5870fbfe08954106ab0f7028eb2f2e047ad03d +size 67011843 diff --git a/83m20b1b5/global_step37905/layer_03-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8c3b376351ef45af13f1fed5d2209dd8c31560f --- /dev/null +++ b/83m20b1b5/global_step37905/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:869f468e9ef7d58ca5ae59c21ee32d620a62a2f2102bcc3dfe8e8e459ef5a3d5 +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_04-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..201b532d687f2fc3e0a19a654eda885fa550b0ef --- /dev/null +++ b/83m20b1b5/global_step37905/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ff9666bdeb4ccf6421e64951a5f544cf122393c8598e103bbe945db076b59d +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_05-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d473cbd7a4f22eab90726af457d97c714624b63 --- /dev/null +++ b/83m20b1b5/global_step37905/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73f75c5427ca18a7654df1824366348c8f3238e7256468af4bd5378de0bbab9b +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_06-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dcd902df4556f5feb559d1834a1e2aa87f0b005 --- /dev/null +++ b/83m20b1b5/global_step37905/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae6a2f6974d9661a73106a861bbec68ddb44017d8e4bdac6601ef6181e447a7 +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_07-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3340e44b190e5f0b66daa499a93fbbdfd66d337b --- /dev/null +++ b/83m20b1b5/global_step37905/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567fe5903bb762bd44cd326ce53e47efd68071304209f6547e20416c77ea43ce +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_08-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5a904d5597fe75d62326bdc943c415a88fbd74f --- /dev/null +++ b/83m20b1b5/global_step37905/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1412711347fe86b6046b8ed93adfab8664976a2199e4933609ef5c53110c26b3 +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_09-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2ef7b0137357d9ce4a59d5e8e08d69b20280ff0 --- /dev/null +++ b/83m20b1b5/global_step37905/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ff84befabc0d80019cd103cc7161b2918a7dd8b90e325c2081199b97b421d8 +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_10-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7581d4323640cb66bd1fefe083fefb1fe0157ac --- /dev/null +++ b/83m20b1b5/global_step37905/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:184e9d752443284ea67de279f6f2812c0a8b75eecf15255a8f9246c6a48903d8 +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_11-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5feb4c4c761dc0a25f17f997fe2c11ac91d7f6e2 --- /dev/null +++ b/83m20b1b5/global_step37905/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc6c76861710c2ef2fdeab8b886f4e27ee78f2f89bd479adb4be032b00e17cf +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_12-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..15275041f939059b694be201f89df11d357a6f78 --- /dev/null +++ b/83m20b1b5/global_step37905/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b8d0668a010d4222e4e532b2d864cf6c91af976ec0eea9606e408af643ff44 +size 9851395 diff --git a/83m20b1b5/global_step37905/layer_14-model_00-model_states.pt b/83m20b1b5/global_step37905/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f769cbbbe1066f8e1fa0ff47563da2a160e01567 --- /dev/null +++ b/83m20b1b5/global_step37905/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d94fd331546466a585cd09c21c937815244c3d0eb294c1ba202f2c47e6b970 +size 3779 diff --git a/83m20b1b5/global_step37905/mp_rank_00_model_states.pt b/83m20b1b5/global_step37905/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..02a990e5e7b07230ebe0c018e3bba1c7b78d48cf --- /dev/null +++ b/83m20b1b5/global_step37905/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e8063af6d10ab34f608c839ea20066464c12cb8adea14ba29efdc63d5165e4a +size 31603 diff --git a/83m20b1b5/logs/2820865.err b/83m20b1b5/logs/2820865.err new file mode 100644 index 0000000000000000000000000000000000000000..8de833f4adb6fbdc271ea2e705b435f2f3a8f395 --- /dev/null +++ b/83m20b1b5/logs/2820865.err @@ -0,0 +1,1109 @@ +7: 2023-02-09 22:41:51.880107: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:51.880112: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:51.880106: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:51.880126: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:51.880119: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:51.880129: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:51.880133: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:51.880119: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:51.881771: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:51.881771: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:51.881764: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:51.881770: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:51.881779: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:51.881780: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:51.881778: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:51.881766: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:51.882987: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:51.882996: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:51.882993: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:51.882997: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:51.882999: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:51.882991: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:51.882991: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:51.882986: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:51.883089: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:51.883104: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:51.883117: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:51.883118: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:51.883122: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:51.883113: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:51.883110: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:51.883104: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:51.883292: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:51.883299: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:51.883300: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:51.883302: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:51.883290: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:51.883288: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:51.883293: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:51.883312: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:51.885756: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:51.885759: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:51.885762: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:51.885766: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:51.885754: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:51.885766: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:51.885773: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:51.885760: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:51.888060: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:51.888070: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:51.888075: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:51.888078: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:51.888058: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:51.888084: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:51.888088: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:51.888129: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:51.891037: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:51.891029: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:51.891040: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:51.891044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:51.891044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:51.891053: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:51.891048: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:51.891046: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:53.411268: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:53.411264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:53.411278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:53.411270: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:53.411270: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:53.411279: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:53.411532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-09 22:41:53.411277: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:53.411284: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-09 22:41:53.411540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:53.411676: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-09 22:41:53.411678: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:53.411681: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:53.411682: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:53.411684: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:53.411685: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:53.411685: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:53.411687: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411533: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:53.411540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:53.411546: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:53.411545: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:53.411541: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:53.411928: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411931: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411941: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411941: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411944: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411945: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411950: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:53.411950: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:53.438264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:53.438259: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:53.438264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:53.438276: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:53.438269: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:53.438277: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:53.438268: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:53.438272: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:53.438660: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:53.438659: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:53.438665: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:53.438664: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:53.438669: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:53.438668: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:53.438673: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:53.438676: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:53.489789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:53.489831: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-09 22:41:53.489787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:53.489836: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-09 22:41:53.489787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:53.489836: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-09 22:41:53.489787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:53.489838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-09 22:41:53.489791: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:53.489837: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-09 22:41:53.489798: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:53.489837: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-09 22:41:53.489801: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:53.489838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-09 22:41:53.489791: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:53.489836: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-09 22:41:53.490172: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:53.490175: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:53.490179: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:53.490183: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:53.490184: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:53.490275: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:53.490277: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:53.490186: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:53.490188: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:53.490190: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:53.490280: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:53.490280: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:53.490281: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:53.490284: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:53.490285: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:53.490285: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:53.629113: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:53.629119: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:53.629162: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:53.629142: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:53.629151: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:53.629162: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:53.629171: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:53.629167: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:53.629555: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:53.629560: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:53.629564: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:53.629586: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:53.629591: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:53.629600: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:53.629605: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:53.629606: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:53.634157: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:53.634165: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:53.634169: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:53.634169: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:53.634168: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:53.634356: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:53.634361: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:53.634363: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:53.634363: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:53.634365: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:53.634198: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:53.634204: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:53.634202: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:53.634396: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:53.634400: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:53.634411: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:53.637641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:53.637643: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:53.637658: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:53.637650: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:53.637657: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:53.637663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:53.637650: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:53.637650: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:53.638057: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:53.638060: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:53.638061: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:53.638062: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:53.638064: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:53.638066: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:53.638063: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:53.638067: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:42:03.058739: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.058774: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.058787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.058802: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.058820: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.058838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.058845: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.058899: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.062553: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.062581: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.062579: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.062594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.062599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.062610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.062613: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.062617: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.063271: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.063277: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.063272: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.063351: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-09 22:42:03.063281: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.063280: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:42:03.063358: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.063285: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:42:03.063354: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.063286: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:42:03.063358: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.063283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:42:03.063359: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.063361: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.063360: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.063363: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.064503: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.064513: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.064511: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.064521: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.064520: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.064529: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.064530: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.064534: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068334: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068337: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068338: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068341: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068341: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068340: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068343: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.068353: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.068355: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.068355: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.068359: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.068360: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.068361: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.068362: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.068364: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.069750: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.069757: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.069762: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.069764: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.069766: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.069768: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.069772: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.069770: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070083: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070087: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070094: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070092: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070094: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070093: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070099: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070099: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070100: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070109: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070111: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070112: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070112: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070117: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070156: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070170: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.070699: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.070702: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.070705: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.070707: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.070709: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.070710: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.070712: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.070715: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.070715: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.070713: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.070719: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.070723: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.070725: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.070725: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.070727: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.070728: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.080277: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.080274: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.080282: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.080282: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.080283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.080287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.080288: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.080293: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093349: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:42:03.093364: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093353: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:42:03.093366: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093354: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:42:03.093369: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093357: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:42:03.093370: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093355: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:42:03.093371: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093357: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:42:03.093371: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093364: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.093374: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.093379: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.093383: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093360: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:42:03.093385: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.093389: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.093390: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093369: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093370: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.093391: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.093393: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093373: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.093422: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093365: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.093436: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093376: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093375: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093379: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093383: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.070386: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.070389: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.070391: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.070394: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.070402: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.070402: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.070399: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.070401: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.070411: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.070412: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.070418: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.070418: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.070442: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.070450: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.070458: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.070464: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.119255: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.119256: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.119257: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.119261: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.119261: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.119261: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.119277: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.119277: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.119278: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.119279: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.119280: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.119280: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.119342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.119345: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.119362: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.119361: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.142966: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.143008: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.143015: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.143456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.143458: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.143476: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.143478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.143484: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145459: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145458: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145458: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145461: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145463: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145467: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145464: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145476: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.145483: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.145484: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.145486: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.145487: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.145486: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.145489: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.145505: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.145530: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +1: Successfully preprocessed all matching files. +1: Successfully preprocessed all matching files. +1: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: +0: +0: +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +2: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +3: +3: +3: +3: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: +4: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: +5: +5: +5: +5: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +6: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: +7: +7: +7: +7: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +7: Loading extension module utils... +6: Loading extension module utils... +7: Loading extension module utils... +6: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +1: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +7: +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +4: +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils...Loading extension module utils... +6: +6: No modifications detected for re-loaded extension module utils, skipping build step... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils...Loading extension module utils... +5: +5: +5: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/83m20b1b5/logs/2820865.out b/83m20b1b5/logs/2820865.out new file mode 100644 index 0000000000000000000000000000000000000000..0a901de818bd59e35602ae4ae8cf735adbb2a430 --- /dev/null +++ b/83m20b1b5/logs/2820865.out @@ -0,0 +1,4372 @@ +Model parameters: d_model 640 ffw_size 2560 kv_size 64 n_heads 10 n_layers 10 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 10 --hidden-size 640 --num-attention-heads 10 --kv-channels 64 --ffn-hidden-size 2560 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --loss-scale 12 --clip-grad 1.0 --kill-switch-path kill-switch-83m20b1b5val --bf16 --checkpoint-activations --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_83m20b1b5val --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_83m20b1b5 --load checkpoints_83m20b1b5 --train-weighted-split-paths-path train20b.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/2820865.json --zero-stage 0 +START 2820865: Thu 09 Feb 2023 10:41:25 PM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 45.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 42.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 40.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 42.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 42.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 42.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 42.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 37.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 48.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 41.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 39.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 40.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 39.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 39.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 38.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 49.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 46.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 43.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 38.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 45.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 38.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 38.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 39.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 45.0c 76.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 40.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 40.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 45.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 42.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 42.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 39.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +6: Launching on nid006831 (6/8), master nid006825 port 9999, GPUs 8, CUDA: True +3: Launching on nid006828 (3/8), master nid006825 port 9999, GPUs 8, CUDA: True +0: Launching on nid006825 (0/8), master nid006825 port 9999, GPUs 8, CUDA: True +1: Launching on nid006826 (1/8), master nid006825 port 9999, GPUs 8, CUDA: True +5: Launching on nid006830 (5/8), master nid006825 port 9999, GPUs 8, CUDA: True +2: Launching on nid006827 (2/8), master nid006825 port 9999, GPUs 8, CUDA: True +4: Launching on nid006829 (4/8), master nid006825 port 9999, GPUs 8, CUDA: True +7: Launching on nid006832 (7/8), master nid006825 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... True +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2820865.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... True +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 2560 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 640 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-83m20b1b5val +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_83m20b1b5 +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... 12.0 +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 10 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 10 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_83m20b1b5 +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_83m20b1b5val +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +7: > setting tensorboard ... +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-09 22:43:05,460] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.095 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 102 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: ninja: no work to do. +0: >>> done with compiling and loading fused kernels. Compilation time: 18.194 seconds +0: time to initialize megatron (seconds): -17.490 +0: [after megatron is initialized] datetime: 2023-02-09 22:43:26 +0: building GPT model ... +0: [2023-02-09 22:43:26,647] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-09 22:43:26,648] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-09 22:43:26,648] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.11 GB, percent = 6.0% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-09 22:43:28,641] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=17 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: undo +0: 14: MixedFusedLayerNorm +0: 15: EmbeddingPipe +0: 16: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-09 22:43:28,917] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-09 22:43:28,917] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-09 22:43:28,917] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.13 GB, percent = 6.0% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-09 22:43:28,918] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-09 22:43:41,029] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-09 22:43:41,030] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-09 22:43:41,030] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-09 22:43:41,032] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-09 22:43:41,032] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-09 22:43:41,151] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-09 22:43:41,152] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-09 22:43:41,152] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.81 GB, percent = 6.1% +2: Time to load utils op: 0.11050724983215332 seconds +2: Time to load utils op: 0.11052346229553223 seconds +2: Time to load utils op: 0.11053776741027832 seconds +2: Time to load utils op: 0.1105506420135498 secondsTime to load utils op: 0.11055111885070801 seconds +2: +2: Time to load utils op: 0.11057925224304199 seconds +2: Time to load utils op: 0.11058998107910156 secondsTime to load utils op: 0.11058282852172852 seconds +2: +3: Time to load utils op: 0.11081337928771973 seconds +3: Time to load utils op: 0.11085247993469238 secondsTime to load utils op: 0.1108541488647461 seconds +3: +3: Time to load utils op: 0.11086034774780273 seconds +3: Time to load utils op: 0.11088991165161133 seconds +3: Time to load utils op: 0.11088252067565918 secondsTime to load utils op: 0.1108846664428711 secondsTime to load utils op: 0.11089181900024414 seconds +3: +3: +4: Time to load utils op: 0.11158561706542969 seconds +4: Time to load utils op: 0.1116034984588623 seconds +4: Time to load utils op: 0.11161327362060547 seconds +4: Time to load utils op: 0.11163473129272461 seconds +4: Time to load utils op: 0.11163520812988281 seconds +4: Time to load utils op: 0.11165308952331543 seconds +4: Time to load utils op: 0.1116483211517334 secondsTime to load utils op: 0.11165618896484375 seconds +4: +5: Time to load utils op: 0.11072444915771484 seconds +5: Time to load utils op: 0.11074662208557129 seconds +5: Time to load utils op: 0.11075448989868164 seconds +5: Time to load utils op: 0.11077141761779785 secondsTime to load utils op: 0.11077117919921875 seconds +5: +5: Time to load utils op: 0.11078929901123047 secondsTime to load utils op: 0.11078834533691406 seconds +5: +5: Time to load utils op: 0.11079549789428711 seconds +7: Time to load utils op: 0.10970878601074219 secondsTime to load utils op: 0.109710693359375 seconds +7: +7: Time to load utils op: 0.10972237586975098 seconds +7: Time to load utils op: 0.10972356796264648 seconds +7: Time to load utils op: 0.10972929000854492 secondsTime to load utils op: 0.10973405838012695 seconds +7: +7: Time to load utils op: 0.10973024368286133 secondsTime to load utils op: 0.10973715782165527 seconds +7: +6: Time to load utils op: 0.11081600189208984 secondsTime to load utils op: 0.11081457138061523 secondsTime to load utils op: 0.11081314086914062 secondsTime to load utils op: 0.1108100414276123 secondsTime to load utils op: 0.11082100868225098 seconds +6: Time to load utils op: 0.11081695556640625 seconds +6: Time to load utils op: 0.1108236312866211 seconds +6: +6: +6: +6: +6: Time to load utils op: 0.11082601547241211 seconds +0: Time to load utils op: 0.10738539695739746 seconds +0: Time to load utils op: 0.20477509498596191 seconds +0: Time to load utils op: 0.20524001121520996 seconds +0: Time to load utils op: 0.20520401000976562 seconds +0: Time to load utils op: 0.20531392097473145 seconds +0: Time to load utils op: 0.20486164093017578 seconds +0: Time to load utils op: 0.20546841621398926 seconds +0: Time to load utils op: 0.20581698417663574 seconds +1: Time to load utils op: 0.21094250679016113 seconds +1: Time to load utils op: 0.21079373359680176 seconds +1: Time to load utils op: 0.2117149829864502 seconds +1: Time to load utils op: 0.21197819709777832 seconds +1: Time to load utils op: 0.21204590797424316 seconds +1: Time to load utils op: 0.21187138557434082 seconds +1: Time to load utils op: 0.21103167533874512 secondsTime to load utils op: 0.21103668212890625 seconds +1: +3: Time to load utils op: 0.0008714199066162109 seconds +3: Time to load utils op: 0.001094818115234375 seconds +3: Time to load utils op: 0.0011661052703857422 seconds +3: Time to load utils op: 0.0012979507446289062 seconds +3: Time to load utils op: 0.0012667179107666016 seconds +3: Time to load utils op: 0.0012784004211425781 seconds +3: Time to load utils op: 0.0013175010681152344 seconds +3: Time to load utils op: 0.0013477802276611328 seconds +7: Time to load utils op: 0.0006194114685058594 seconds +7: Time to load utils op: 0.0007753372192382812 seconds +7: Time to load utils op: 0.0007531642913818359 seconds +7: Time to load utils op: 0.0008656978607177734 seconds +7: Time to load utils op: 0.0010833740234375 seconds +7: Time to load utils op: 0.0011832714080810547 seconds +7: Time to load utils op: 0.0011663436889648438 seconds +7: Time to load utils op: 0.0012052059173583984 seconds +2: Time to load utils op: 0.0006024837493896484 seconds +2: Time to load utils op: 0.0010306835174560547 seconds +2: Time to load utils op: 0.0010521411895751953 seconds +2: Time to load utils op: 0.0012471675872802734 seconds +2: Time to load utils op: 0.0013339519500732422 seconds +2: Time to load utils op: 0.0012717247009277344 secondsTime to load utils op: 0.001260995864868164 seconds +2: +2: Time to load utils op: 0.001378774642944336 seconds +4: Time to load utils op: 0.0008676052093505859 seconds +4: Time to load utils op: 0.0008156299591064453 secondsTime to load utils op: 0.00080108642578125 seconds +4: +4: Time to load utils op: 0.0009839534759521484 seconds +4: Time to load utils op: 0.0009768009185791016 secondsTime to load utils op: 0.0010569095611572266 seconds +4: +4: Time to load utils op: 0.0010123252868652344 seconds +4: Time to load utils op: 0.0010883808135986328 seconds +1: Time to load utils op: 0.0010993480682373047 seconds +1: Time to load utils op: 0.0013799667358398438 seconds +1: Time to load utils op: 0.001146078109741211 secondsTime to load utils op: 0.001394033432006836 seconds +1: +1: Time to load utils op: 0.0011646747589111328 seconds +1: Time to load utils op: 0.0013766288757324219 seconds +1: Time to load utils op: 0.0013599395751953125 seconds +1: Time to load utils op: 0.001379251480102539 seconds +5: Time to load utils op: 0.0008325576782226562 seconds +6: Time to load utils op: 0.001008749008178711 secondsTime to load utils op: 0.0009496212005615234 seconds +6: +6: Time to load utils op: 0.0009789466857910156 seconds +6: Time to load utils op: 0.0012297630310058594 seconds +6: Time to load utils op: 0.0011970996856689453 seconds +6: Time to load utils op: 0.0012052059173583984 seconds +6: Time to load utils op: 0.0011756420135498047 seconds +6: Time to load utils op: 0.0012738704681396484 seconds +5: Time to load utils op: 0.0011360645294189453 seconds +5: Time to load utils op: 0.0010707378387451172 secondsTime to load utils op: 0.0011234283447265625 seconds +5: +5: Time to load utils op: 0.0010962486267089844 seconds +5: Time to load utils op: 0.0010914802551269531 seconds +5: Time to load utils op: 0.0011320114135742188 seconds +5: Time to load utils op: 0.0011856555938720703 seconds +0: Time to load utils op: 0.0006012916564941406 seconds +0: Time to load utils op: 0.0005633831024169922 seconds +0: Time to load utils op: 0.00036787986755371094 seconds +0: Time to load utils op: 0.0005767345428466797 seconds +0: Time to load utils op: 0.0004687309265136719 seconds +0: Time to load utils op: 0.0005748271942138672 seconds +0: Time to load utils op: 0.0003857612609863281 seconds +0: [2023-02-09 22:43:41,387] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-09 22:43:41,388] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-09 22:43:41,388] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.92 GB, percent = 6.1% +0: [2023-02-09 22:43:41,515] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-02-09 22:43:41,515] [INFO] [utils.py:828:see_memory_usage] MA 0.37 GB Max_MA 0.37 GB CA 0.48 GB Max_CA 0 GB +0: [2023-02-09 22:43:41,515] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.95 GB, percent = 6.1% +0: [2023-02-09 22:43:41,618] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-09 22:43:41,619] [INFO] [utils.py:828:see_memory_usage] MA 0.37 GB Max_MA 0.37 GB CA 0.48 GB Max_CA 0 GB +0: [2023-02-09 22:43:41,619] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.95 GB, percent = 6.1% +0: [2023-02-09 22:43:41,722] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-09 22:43:41,722] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:41,723] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.95 GB, percent = 6.1% +0: [2023-02-09 22:43:41,823] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-09 22:43:41,824] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:41,824] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.95 GB, percent = 6.1% +0: [2023-02-09 22:43:41,927] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-09 22:43:41,927] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:41,927] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.95 GB, percent = 6.1% +0: [2023-02-09 22:43:42,028] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-09 22:43:42,028] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:42,029] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.95 GB, percent = 6.1% +0: [2023-02-09 22:43:42,134] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-09 22:43:42,135] [INFO] [utils.py:828:see_memory_usage] MA 0.48 GB Max_MA 0.48 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:42,135] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.95 GB, percent = 6.1% +0: [2023-02-09 22:43:42,235] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-09 22:43:42,236] [INFO] [utils.py:828:see_memory_usage] MA 0.48 GB Max_MA 0.48 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:42,236] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.95 GB, percent = 6.1% +0: [2023-02-09 22:43:42,236] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-09 22:43:42,236] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-09 22:43:42,236] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-09 22:43:42,236] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-09 22:43:42,239] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-09 22:43:42,239] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-09 22:43:42,239] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-09 22:43:42,239] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-09 22:43:42,239] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.00041747093200683594 seconds +0: [2023-02-09 22:43:42,239] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-09 22:43:42,278] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=17 [0, 17) STAGE_PARAMS=82741760 (82.742M) TOTAL_PARAMS=82741760 (82.742M) UNIQUE_PARAMS=82741760 (82.742M) +0: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,572] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,572] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,573] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,573] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,846] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:43,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:43,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:43,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:43,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:43,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:43,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:43,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:43,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:43,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:43,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:43,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:43,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:43,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:43,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding learning rate value to 0.0002 +0: > overriding minimum learning rate value to 2e-05 +0: > overriding warmup iterations value to 0 +0: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,189] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +1: [2023-02-09 22:43:45,190] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +3: [2023-02-09 22:43:45,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,192] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +3: [2023-02-09 22:43:45,194] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +1: [2023-02-09 22:43:45,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,194] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +1: [2023-02-09 22:43:45,195] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +4: [2023-02-09 22:43:45,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,196] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +4: [2023-02-09 22:43:45,197] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +6: [2023-02-09 22:43:45,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,198] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +2: [2023-02-09 22:43:45,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,198] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +7: [2023-02-09 22:43:45,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,199] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +6: [2023-02-09 22:43:45,199] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +3: [2023-02-09 22:43:45,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,199] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +7: [2023-02-09 22:43:45,199] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +2: [2023-02-09 22:43:45,199] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +7: [2023-02-09 22:43:45,200] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +7: [2023-02-09 22:43:45,200] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +3: [2023-02-09 22:43:45,201] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +6: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,201] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +2: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,202] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +6: [2023-02-09 22:43:45,202] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +0: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,202] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +0: [2023-02-09 22:43:45,202] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +6: [2023-02-09 22:43:45,202] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +2: [2023-02-09 22:43:45,203] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +3: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,204] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +3: [2023-02-09 22:43:45,204] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +3: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,204] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +0: [2023-02-09 22:43:45,204] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +0: [2023-02-09 22:43:45,204] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,205] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +3: [2023-02-09 22:43:45,205] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +3: [2023-02-09 22:43:45,205] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +1: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,206] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +4: [2023-02-09 22:43:45,206] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +0: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,207] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +0: [2023-02-09 22:43:45,207] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +4: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,208] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +0: [2023-02-09 22:43:45,208] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +4: [2023-02-09 22:43:45,209] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +3: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,210] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +3: [2023-02-09 22:43:45,211] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,212] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,213] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +7: [2023-02-09 22:43:45,213] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +4: [2023-02-09 22:43:45,214] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +6: [2023-02-09 22:43:45,217] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +7: [2023-02-09 22:43:45,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,218] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +6: [2023-02-09 22:43:45,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,218] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +6: [2023-02-09 22:43:45,218] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +7: [2023-02-09 22:43:45,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,219] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +7: [2023-02-09 22:43:45,219] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +6: [2023-02-09 22:43:45,220] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +6: [2023-02-09 22:43:45,220] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +3: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,220] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +7: [2023-02-09 22:43:45,220] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +7: [2023-02-09 22:43:45,221] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +3: [2023-02-09 22:43:45,222] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +4: [2023-02-09 22:43:45,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,222] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +4: [2023-02-09 22:43:45,224] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +2: [2023-02-09 22:43:45,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,224] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +2: [2023-02-09 22:43:45,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,225] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +3: [2023-02-09 22:43:45,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,225] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +2: [2023-02-09 22:43:45,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,225] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +2: [2023-02-09 22:43:45,225] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +2: [2023-02-09 22:43:45,226] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +3: [2023-02-09 22:43:45,226] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +2: [2023-02-09 22:43:45,227] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +1: [2023-02-09 22:43:45,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,227] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +1: [2023-02-09 22:43:45,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,228] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +5: [2023-02-09 22:43:45,227] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +5: [2023-02-09 22:43:45,227] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +5: [2023-02-09 22:43:45,227] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +5: [2023-02-09 22:43:45,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,227] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +5: [2023-02-09 22:43:45,228] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +5: [2023-02-09 22:43:45,228] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +5: [2023-02-09 22:43:45,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,229] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +5: [2023-02-09 22:43:45,229] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +1: [2023-02-09 22:43:45,229] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +6: [2023-02-09 22:43:45,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,230] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +1: [2023-02-09 22:43:45,230] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +5: [2023-02-09 22:43:45,230] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +1: [2023-02-09 22:43:45,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,231] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +6: [2023-02-09 22:43:45,231] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +2: [2023-02-09 22:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,231] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +2: [2023-02-09 22:43:45,231] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +1: [2023-02-09 22:43:45,232] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +2: [2023-02-09 22:43:45,232] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +5: [2023-02-09 22:43:45,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,233] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +3: [2023-02-09 22:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,233] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +0: [2023-02-09 22:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,233] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +1: [2023-02-09 22:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,234] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +0: [2023-02-09 22:43:45,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,234] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +0: [2023-02-09 22:43:45,234] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +3: [2023-02-09 22:43:45,234] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +2: [2023-02-09 22:43:45,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,235] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +2: [2023-02-09 22:43:45,235] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +1: [2023-02-09 22:43:45,235] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +0: [2023-02-09 22:43:45,235] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +2: [2023-02-09 22:43:45,236] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +6: [2023-02-09 22:43:45,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,245] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +7: [2023-02-09 22:43:45,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,246] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +7: [2023-02-09 22:43:45,246] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +7: [2023-02-09 22:43:45,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,247] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +7: [2023-02-09 22:43:45,247] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +7: [2023-02-09 22:43:45,248] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +2: [2023-02-09 22:43:45,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,250] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +2: [2023-02-09 22:43:45,251] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +5: [2023-02-09 22:43:45,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,253] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +5: [2023-02-09 22:43:45,254] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +5: [2023-02-09 22:43:45,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,255] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +5: [2023-02-09 22:43:45,256] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +0: [2023-02-09 22:43:45,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,274] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +0: [2023-02-09 22:43:45,275] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +0: [2023-02-09 22:43:45,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,289] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +0: [2023-02-09 22:43:45,291] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +4: [2023-02-09 22:43:45,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,292] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +4: [2023-02-09 22:43:45,293] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +0: [2023-02-09 22:43:45,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,364] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +0: [2023-02-09 22:43:45,366] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +1: [2023-02-09 22:43:45,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,844] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +1: [2023-02-09 22:43:45,845] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +4: [2023-02-09 22:43:46,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:46,439] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +4: [2023-02-09 22:43:46,440] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +4: [2023-02-09 22:43:46,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b1b5/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:46,519] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +4: [2023-02-09 22:43:46,520] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +0: successfully loaded checkpoint from checkpoints_83m20b1b5 at iteration 0 +7: time (ms) | load-checkpoint: 4245.36 +0: estimated model parameters: 0.08274176 +0: estimated model parameters without embeddings: 0.04923648 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-09 22:43:46 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.038720 seconds +0: number of documents: 41786294 +0: > dataset split: +0: train: +0: document indices in [0, 41786294) total of 41786294 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.056 seconds +0: total number of samples: 9767463 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.038154 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.023 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-09 22:44:01 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 20231.71 | train/valid/test-data-iterators-setup: 13823.79 +0: [after training is done] datetime: 2023-02-09 22:44:01 +0: [2023-02-09 22:44:01,454] [INFO] [checkpointing.py:553:forward] Activation Checkpointing Information +0: [2023-02-09 22:44:01,454] [INFO] [checkpointing.py:554:forward] ----Partition Activations False, CPU CHECKPOINTING False +0: [2023-02-09 22:44:01,454] [INFO] [checkpointing.py:557:forward] ----contiguous Memory Checkpointing False with None total layers +0: [2023-02-09 22:44:01,454] [INFO] [checkpointing.py:560:forward] ----Synchronization False +0: [2023-02-09 22:44:01,454] [INFO] [checkpointing.py:561:forward] ----Profiling time in checkpointing False +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.618937E+00 | lm loss PPL: 3.729791E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 2820865: Thu 09 Feb 2023 10:44:22 PM EET diff --git a/83m20b1b5/sbatch_83m20b1b5.sh b/83m20b1b5/sbatch_83m20b1b5.sh new file mode 100755 index 0000000000000000000000000000000000000000..9e534aa0c08aea61f0ac056a32049e1ba32e4ffb --- /dev/null +++ b/83m20b1b5/sbatch_83m20b1b5.sh @@ -0,0 +1,168 @@ +#!/bin/bash +#SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901 +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 2-0:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=83m20b1b5 + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT +mkdir -p $CHECKPOINT_PATH +mkdir -p $TENSORBOARD_PATH + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train1b5.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_1B5_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_74M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 19873180000 +# -> Samples: 9703701 +TRAIN_SAMPLES=9_703_701 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 97_037 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --loss-scale 12 \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + --checkpoint-activations \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/83m20b1b5/sbatch_83m20b1b5val.sh b/83m20b1b5/sbatch_83m20b1b5val.sh new file mode 100644 index 0000000000000000000000000000000000000000..920fda932a43100364f94589574fa89b4066f8dc --- /dev/null +++ b/83m20b1b5/sbatch_83m20b1b5val.sh @@ -0,0 +1,173 @@ +#!/bin/bash +#SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901 +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 2-0:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=83m20b1b5val +VARIANT_CKPT=83m20b1b5 + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT +mkdir -p $CHECKPOINT_PATH +mkdir -p $TENSORBOARD_PATH + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train20b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_74M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 19873180000 +# -> Samples: 9703701 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --loss-scale 12 \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + --checkpoint-activations \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/83m20b1b5/tensorboard_83m20b1b5/events.out.tfevents.1675516829.nid006955.77430.0 b/83m20b1b5/tensorboard_83m20b1b5/events.out.tfevents.1675516829.nid006955.77430.0 new file mode 100644 index 0000000000000000000000000000000000000000..95477f5adfd6157d0e60b9c21333e9bf0b53fb8f --- /dev/null +++ b/83m20b1b5/tensorboard_83m20b1b5/events.out.tfevents.1675516829.nid006955.77430.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b21db5863de92de7fcfaffd21e98f31903a5baf799ca48c7a0ea229a15a650 +size 67726209 diff --git a/83m20b1b5/tensorboard_83m20b1b5val/events.out.tfevents.1675555121.nid006665.100183.0 b/83m20b1b5/tensorboard_83m20b1b5val/events.out.tfevents.1675555121.nid006665.100183.0 new file mode 100644 index 0000000000000000000000000000000000000000..623f930615df07b657d391081d71adc3b892b1b2 --- /dev/null +++ b/83m20b1b5/tensorboard_83m20b1b5val/events.out.tfevents.1675555121.nid006665.100183.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d74c7aae0fe0b6a3533f50890923631738f0d85ad7eeca820ccb3ac073af4f26 +size 980 diff --git a/83m20b1b5/tensorboard_83m20b1b5val/events.out.tfevents.1675975385.nid006832.94701.0 b/83m20b1b5/tensorboard_83m20b1b5val/events.out.tfevents.1675975385.nid006832.94701.0 new file mode 100644 index 0000000000000000000000000000000000000000..5968ffbb617cb3c913f7304ede988646e56a29c1 --- /dev/null +++ b/83m20b1b5/tensorboard_83m20b1b5val/events.out.tfevents.1675975385.nid006832.94701.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d81edf9fe83088b750c9930d4b69c720439ffb25ddb704614a37b20e21d2388 +size 980 diff --git a/83m20b1b5/transformers/config.json b/83m20b1b5/transformers/config.json new file mode 100644 index 0000000000000000000000000000000000000000..553a76e467e8e403cf19f94f84cd1613f403f329 --- /dev/null +++ b/83m20b1b5/transformers/config.json @@ -0,0 +1 @@ +{"vocab_size": 50304, "n_positions": 2048, "n_embd": 640, "n_layer": 10, "n_head": 10, "n_inner": 2560, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"} \ No newline at end of file diff --git a/83m20b1b5/transformers/pytorch_model.bin b/83m20b1b5/transformers/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6aa00376f594bd7fb1c19b4bc1510dcf907ecc80 --- /dev/null +++ b/83m20b1b5/transformers/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:713d1d9eccb5392077dfde7ed54d01161741f0067b95af0299f6f9278ae2b6bf +size 249414221 diff --git a/83m20b20b/.gitattributes b/83m20b20b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..73abc9f745b5567fc3113b40b3da2f3085e17eb7 --- /dev/null +++ b/83m20b20b/.gitattributes @@ -0,0 +1 @@ +*/evaluation/generation/examples*.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/83m20b20b/2809865.err b/83m20b20b/2809865.err new file mode 100644 index 0000000000000000000000000000000000000000..d5c93e9650c2d29ab9aa18e699c60afc38d03241 --- /dev/null +++ b/83m20b20b/2809865.err @@ -0,0 +1,1117 @@ +2: 2023-02-05 01:56:45.553820: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.553830: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553822: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553840: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553820: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: 2023-02-05 01:56:45.553865: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.554906: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.554925: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.554935: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: 2023-02-05 01:56:45.553884: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.553896: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.554565: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.554584: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553895: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553901: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553905: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.554945: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.554933: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.553907: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.554600: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.554597: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553911: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-05 01:56:45.553888: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.554965: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.554985: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-05 01:56:45.554971: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: 2023-02-05 01:56:45.553925: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-05 01:56:45.553931: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.554636: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.554660: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.554675: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-05 01:56:45.554714: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556436: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556440: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556535: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556525: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556537: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556540: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556547: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:45.556607: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.557585: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.557615: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.557582: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: 2023-02-05 01:56:45.557929: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.557940: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.557946: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.557593: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.557627: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.557639: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.557655: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-05 01:56:45.557663: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.557956: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.557963: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.558016: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.558023: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-05 01:56:45.558016: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.559373: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.559389: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.559379: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.559404: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.559412: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.559409: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.559417: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-05 01:56:45.559429: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-05 01:56:57.485042: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.484773: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485072: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.484802: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-05 01:56:57.485452: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485597: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.485608: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485288: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485528: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485109: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.484834: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485404: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485627: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.485628: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485320: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485565: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485121: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.484850: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485501: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485424: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485655: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.485662: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485345: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485555: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485131: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.484868: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485865: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.485512: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485462: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485685: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.485705: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485155: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.484886: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485883: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.485528: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.485668: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485364: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485603: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485147: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.484897: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485892: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.485537: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485475: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.485675: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485369: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485603: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.485101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.484927: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485542: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:56:57.485486: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:56:57.485707: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.485703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485369: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485609: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.486221: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.486081: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:56:57.485538: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-02-05 01:56:57.486255: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.485467: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-05 01:56:57.486098: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.485672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-05 01:56:57.485699: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-05 01:56:57.485382: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-05 01:56:57.485621: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:56:57.486280: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:56:57.486112: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:56:57.485914: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.485927: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.485933: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.485941: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486388: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-05 01:56:57.486292: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486265: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-05 01:56:57.486134: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-05 01:56:57.486138: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-05 01:56:57.486142: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-05 01:56:57.486144: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486370: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486385: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486343: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486320: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-05 01:56:57.485949: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486411: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486434: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486441: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-05 01:56:57.486330: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-05 01:56:57.486332: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-05 01:56:57.486340: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486293: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486303: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486323: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-05 01:56:57.486156: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486401: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486426: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486421: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486421: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486438: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486360: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486358: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486375: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486398: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486416: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486440: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486456: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-05 01:56:57.486355: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486337: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-05 01:56:57.486452: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486371: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486381: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486449: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486465: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486469: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486341: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486389: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-05 01:56:57.486456: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-05 01:56:57.486481: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486346: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486401: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:56:57.486359: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486426: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-05 01:56:57.486428: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-05 01:57:23.192268: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.192300: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.192318: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.192330: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.192352: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.192362: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.192366: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.192384: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.200638: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.200661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.200832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-05 01:57:23.200672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.200692: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.200705: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.200719: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.200733: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201004: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-05 01:57:23.200739: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201033: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.200861: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201044: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.200858: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201064: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.200891: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201285: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.200899: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201288: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-05 01:57:23.201287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-05 01:57:23.201244: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-05 01:57:23.200909: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201311: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-05 01:57:23.201385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.200911: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201271: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.201316: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.201318: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.201019: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201289: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.201333: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201301: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-05 01:57:23.201429: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-05 01:57:23.201373: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201312: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-05 01:57:23.201448: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-05 01:57:23.201386: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201460: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-05 01:57:23.201399: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-05 01:57:23.201420: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201476: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-05 01:57:23.201463: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-05 01:57:23.201324: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.201549: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-05 01:57:23.201340: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.201348: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221696: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221696: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221697: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221702: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221705: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221705: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-05 01:57:23.221712: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.221713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.221713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.221722: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.221725: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.221723: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.221722: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-05 01:57:23.221726: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.227047: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.227080: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.227096: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.227134: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.227129: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.227133: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.227145: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.227159: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.230420: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.230426: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230494: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.230426: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.230427: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230495: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.230429: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.230436: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230495: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.230432: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230496: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.230436: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.230436: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.230443: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.230443: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-05 01:57:23.230451: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.230450: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.230452: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230500: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-05 01:57:23.230457: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-05 01:57:23.230457: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230501: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-05 01:57:23.230513: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230515: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230515: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230518: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230519: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230521: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230523: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-05 01:57:23.230524: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230729: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-05 01:57:23.230792: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230732: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.230793: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230736: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.230797: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230735: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.230802: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230742: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230896: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230934: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.230801: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230736: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230936: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.230904: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-05 01:57:23.230805: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230735: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.230809: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230903: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-05 01:57:23.230809: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-05 01:57:23.230810: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230750: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.230805: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230744: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230905: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-05 01:57:23.230758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.230808: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-05 01:57:23.230760: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-05 01:57:23.230763: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230908: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-05 01:57:23.230824: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-05 01:57:23.230823: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.230919: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-05 01:57:23.230824: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-05 01:57:23.230828: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-05 01:57:23.230829: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230911: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.230909: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231129: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230907: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.230929: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230930: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231131: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-05 01:57:23.230932: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230932: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230935: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-05 01:57:23.230936: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-05 01:57:23.230937: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231133: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231134: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231136: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231136: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231138: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231140: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-05 01:57:23.231146: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231146: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231152: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231151: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231155: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231155: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231157: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-05 01:57:23.231158: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230933: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230936: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230940: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230940: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230940: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230942: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-05 01:57:23.230955: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230955: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230956: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230958: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230959: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230959: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230962: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-05 01:57:23.230963: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: +1: +1: +1: +1: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +3: +3: +3: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: +4: +4: +4: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: +5: +5: +5: +5: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +6: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +0: Building extension module utils... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +5: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: +0: +0: Loading extension module utils...Loading extension module utils...Loading extension module utils...Loading extension module utils... +0: +0: +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +1: +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: +4: +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +3: +3: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +7: +7: Loading extension module utils...Loading extension module utils... +7: +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/83m20b20b/2809865.out b/83m20b20b/2809865.out new file mode 100644 index 0000000000000000000000000000000000000000..3812132e1aef2969ca83b627ca509bff2eebcafb --- /dev/null +++ b/83m20b20b/2809865.out @@ -0,0 +1,4395 @@ +Model parameters: d_model 640 ffw_size 2560 kv_size 64 n_heads 10 n_layers 10 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 10 --hidden-size 640 --num-attention-heads 10 --kv-channels 64 --ffn-hidden-size 2560 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --loss-scale 12 --clip-grad 1.0 --kill-switch-path kill-switch-83m20b20bval --bf16 --checkpoint-activations --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_83m20b20bval --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_83m20b20b --load checkpoints_83m20b20b --train-weighted-split-paths-path train20b.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/2809865.json --zero-stage 0 +START 2809865: Sun Feb 5 01:55:02 EET 2023 +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 40.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 41.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 38.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 39.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 45.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 45.0c 100.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 38.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 40.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 38.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 46.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 39.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 44.0c 98.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 43.0c 80.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 47.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 38.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 36.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 38.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 40.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 41.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 42.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 44.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 41.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 46.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 39.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 43.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 39.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +2: Launching on nid005492 (2/8), master nid005490 port 9999, GPUs 8, CUDA: True +5: Launching on nid005495 (5/8), master nid005490 port 9999, GPUs 8, CUDA: True +0: Launching on nid005490 (0/8), master nid005490 port 9999, GPUs 8, CUDA: True +6: Launching on nid005496 (6/8), master nid005490 port 9999, GPUs 8, CUDA: True +4: Launching on nid005494 (4/8), master nid005490 port 9999, GPUs 8, CUDA: True +1: Launching on nid005491 (1/8), master nid005490 port 9999, GPUs 8, CUDA: True +3: Launching on nid005493 (3/8), master nid005490 port 9999, GPUs 8, CUDA: True +7: Launching on nid005497 (7/8), master nid005490 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... True +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2809865.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... True +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 2560 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 640 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-83m20b20bval +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_83m20b20b +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... 12.0 +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 10 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 10 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_83m20b20b +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_83m20b20bval +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-05 01:58:41,690] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +7: > setting tensorboard ... +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.107 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: [1/1] c++ scaled_masked_softmax_hip.cuda.o scaled_masked_softmax_hip.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: [1/1] c++ layer_norm_cuda.o layer_norm_hip_kernel.cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o fused_mix_prec_layer_norm_cuda.so +0: >>> done with compiling and loading fused kernels. Compilation time: 22.567 seconds +0: time to initialize megatron (seconds): 75.149 +0: [after megatron is initialized] datetime: 2023-02-05 01:59:07 +0: building GPT model ... +0: [2023-02-05 01:59:07,277] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-05 01:59:07,278] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-05 01:59:07,278] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 50.86 GB, percent = 10.1% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-05 01:59:09,301] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=17 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: undo +0: 14: MixedFusedLayerNorm +0: 15: EmbeddingPipe +0: 16: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-05 01:59:09,774] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-05 01:59:09,775] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-05 01:59:09,775] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 50.87 GB, percent = 10.1% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-05 01:59:09,776] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-05 01:59:21,582] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-05 01:59:21,583] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-05 01:59:21,583] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-05 01:59:21,585] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-05 01:59:21,585] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-05 01:59:21,695] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-05 01:59:21,696] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-05 01:59:21,696] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.55 GB, percent = 10.2% +0: ninja: no work to do. +0: Time to load utils op: 0.31430530548095703 seconds +0: Time to load utils op: 0.23408937454223633 seconds +0: Time to load utils op: 0.3144571781158447 secondsTime to load utils op: 0.314131498336792 seconds +0: +0: Time to load utils op: 0.3145782947540283 seconds +0: Time to load utils op: 0.31420373916625977 secondsTime to load utils op: 0.31415271759033203 seconds +0: +0: Time to load utils op: 0.3145427703857422 seconds +1: Time to load utils op: 0.31170153617858887 seconds +1: Time to load utils op: 0.31171274185180664 seconds +1: Time to load utils op: 0.31173253059387207 seconds +1: Time to load utils op: 0.31174230575561523 secondsTime to load utils op: 0.31174707412719727 seconds +1: +1: Time to load utils op: 0.3117251396179199 secondsTime to load utils op: 0.311751127243042 secondsTime to load utils op: 0.3117492198944092 seconds +1: +1: +7: Time to load utils op: 0.3088490962982178 seconds +7: Time to load utils op: 0.3150753974914551 seconds +7: Time to load utils op: 0.3086874485015869 seconds +7: Time to load utils op: 0.3088853359222412 seconds +7: Time to load utils op: 0.30868053436279297 secondsTime to load utils op: 0.30872249603271484 secondsTime to load utils op: 0.30904531478881836 seconds +7: +7: +7: Time to load utils op: 0.309039831161499 seconds +3: Time to load utils op: 0.3109261989593506 seconds +3: Time to load utils op: 0.3109421730041504 seconds +3: Time to load utils op: 0.3109426498413086 secondsTime to load utils op: 0.3109583854675293 seconds +3: +3: Time to load utils op: 0.3109619617462158 seconds +3: Time to load utils op: 0.31096553802490234 secondsTime to load utils op: 0.31096816062927246 seconds +3: +3: Time to load utils op: 0.3109700679779053 seconds +2: Time to load utils op: 0.3134269714355469 seconds +2: Time to load utils op: 0.3134341239929199 seconds +2: Time to load utils op: 0.31343984603881836 seconds +2: Time to load utils op: 0.3134582042694092 secondsTime to load utils op: 0.3134603500366211 secondsTime to load utils op: 0.3134646415710449 seconds +2: +2: +2: Time to load utils op: 0.3134737014770508 seconds +2: Time to load utils op: 0.3134877681732178 seconds +4: Time to load utils op: 0.3117845058441162 seconds +4: Time to load utils op: 0.3118009567260742 seconds +4: Time to load utils op: 0.311800479888916 seconds +4: Time to load utils op: 0.31180334091186523 seconds +4: Time to load utils op: 0.31182169914245605 seconds +4: Time to load utils op: 0.3118295669555664 seconds +4: Time to load utils op: 0.3118319511413574 secondsTime to load utils op: 0.31183481216430664 seconds +4: +5: Time to load utils op: 0.31117796897888184 seconds +5: Time to load utils op: 0.3112199306488037 seconds +5: Time to load utils op: 0.31124424934387207 seconds +5: Time to load utils op: 0.31125855445861816 seconds +5: Time to load utils op: 0.31126928329467773 seconds +5: Time to load utils op: 0.3112759590148926 secondsTime to load utils op: 0.31126976013183594 seconds +5: +5: Time to load utils op: 0.3112814426422119 seconds +6: Time to load utils op: 0.31067752838134766 seconds +6: Time to load utils op: 0.3106844425201416 seconds +6: Time to load utils op: 0.31069159507751465 secondsTime to load utils op: 0.31068921089172363 seconds +6: +6: Time to load utils op: 0.3106977939605713 seconds +6: Time to load utils op: 0.3107318878173828 seconds +6: Time to load utils op: 0.31072545051574707 seconds +6: Time to load utils op: 0.31072497367858887 seconds +0: [2023-02-05 01:59:22,031] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-05 01:59:22,032] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-05 01:59:22,032] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.55 GB, percent = 10.2% +0: Time to load utils op: 0.0004782676696777344 seconds +0: Time to load utils op: 0.00046062469482421875 secondsTime to load utils op: 0.0004715919494628906 secondsTime to load utils op: 0.0004646778106689453 secondsTime to load utils op: 0.0004534721374511719 seconds +0: +0: +0: +0: Time to load utils op: 0.0005726814270019531 seconds +0: Time to load utils op: 0.0005056858062744141 seconds +1: Time to load utils op: 0.0009944438934326172 seconds +1: Time to load utils op: 0.0011942386627197266 seconds +1: Time to load utils op: 0.0012617111206054688 seconds +1: Time to load utils op: 0.0012090206146240234 seconds +1: Time to load utils op: 0.0012412071228027344 seconds +1: Time to load utils op: 0.001333475112915039 seconds +1: Time to load utils op: 0.0012972354888916016 seconds +1: Time to load utils op: 0.0013041496276855469 seconds +6: Time to load utils op: 0.0009944438934326172 seconds +5: Time to load utils op: 0.0006415843963623047 seconds +6: Time to load utils op: 0.0011887550354003906 seconds +6: Time to load utils op: 0.001176595687866211 seconds +6: Time to load utils op: 0.00119781494140625 seconds +6: Time to load utils op: 0.0012407302856445312 seconds +6: Time to load utils op: 0.001256704330444336 seconds +6: Time to load utils op: 0.001237630844116211 seconds +6: Time to load utils op: 0.001276254653930664 seconds +5: Time to load utils op: 0.0010187625885009766 seconds +5: Time to load utils op: 0.0010082721710205078 seconds +5: Time to load utils op: 0.0010280609130859375 seconds +5: Time to load utils op: 0.0012249946594238281 seconds +5: Time to load utils op: 0.001277923583984375 seconds +5: Time to load utils op: 0.0012135505676269531 seconds +5: Time to load utils op: 0.0012848377227783203 seconds +4: Time to load utils op: 0.0010151863098144531 seconds +4: Time to load utils op: 0.0011289119720458984 seconds +3: Time to load utils op: 0.0008859634399414062 seconds +4: Time to load utils op: 0.0012478828430175781 seconds +4: Time to load utils op: 0.0012063980102539062 seconds +4: Time to load utils op: 0.0012519359588623047 seconds +4: Time to load utils op: 0.001214742660522461 seconds +4: Time to load utils op: 0.0012295246124267578 seconds +4: Time to load utils op: 0.0012798309326171875 seconds +3: Time to load utils op: 0.0011067390441894531 secondsTime to load utils op: 0.0010924339294433594 seconds +3: +3: Time to load utils op: 0.0011146068572998047 seconds +3: Time to load utils op: 0.0011053085327148438 seconds +3: Time to load utils op: 0.0011124610900878906 seconds +3: Time to load utils op: 0.001116037368774414 seconds +3: Time to load utils op: 0.0011353492736816406 seconds +7: Time to load utils op: 0.0005435943603515625 seconds +7: Time to load utils op: 0.0005490779876708984 seconds +7: Time to load utils op: 0.000568389892578125 seconds +7: Time to load utils op: 0.0005970001220703125 secondsTime to load utils op: 0.0005624294281005859 seconds +7: +7: Time to load utils op: 0.00057220458984375 seconds +7: Time to load utils op: 0.0005724430084228516 seconds +7: Time to load utils op: 0.0006363391876220703 seconds +2: Time to load utils op: 0.001171112060546875 seconds +2: Time to load utils op: 0.0013573169708251953 seconds +2: Time to load utils op: 0.0014357566833496094 seconds +2: Time to load utils op: 0.0014278888702392578 seconds +2: Time to load utils op: 0.0014491081237792969 seconds +2: Time to load utils op: 0.0014345645904541016 seconds +2: Time to load utils op: 0.0014581680297851562 seconds +2: Time to load utils op: 0.0014705657958984375 seconds +0: [2023-02-05 01:59:22,151] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-02-05 01:59:22,152] [INFO] [utils.py:828:see_memory_usage] MA 0.37 GB Max_MA 0.37 GB CA 0.48 GB Max_CA 0 GB +0: [2023-02-05 01:59:22,152] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.69 GB, percent = 10.3% +0: [2023-02-05 01:59:22,247] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-05 01:59:22,248] [INFO] [utils.py:828:see_memory_usage] MA 0.37 GB Max_MA 0.37 GB CA 0.48 GB Max_CA 0 GB +0: [2023-02-05 01:59:22,248] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.69 GB, percent = 10.3% +0: [2023-02-05 01:59:22,342] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-05 01:59:22,343] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:22,343] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.69 GB, percent = 10.3% +0: [2023-02-05 01:59:22,436] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-05 01:59:22,436] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:22,437] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.69 GB, percent = 10.3% +0: [2023-02-05 01:59:22,531] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-05 01:59:22,532] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:22,532] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.69 GB, percent = 10.3% +0: [2023-02-05 01:59:22,624] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-05 01:59:22,625] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:22,625] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.69 GB, percent = 10.3% +0: [2023-02-05 01:59:22,724] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-05 01:59:22,724] [INFO] [utils.py:828:see_memory_usage] MA 0.48 GB Max_MA 0.48 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:22,724] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.69 GB, percent = 10.3% +0: [2023-02-05 01:59:22,817] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-05 01:59:22,817] [INFO] [utils.py:828:see_memory_usage] MA 0.48 GB Max_MA 0.48 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-05 01:59:22,817] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 51.69 GB, percent = 10.3% +0: [2023-02-05 01:59:22,817] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-05 01:59:22,818] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-05 01:59:22,818] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-05 01:59:22,818] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-05 01:59:22,818] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-05 01:59:22,818] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-05 01:59:22,818] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-05 01:59:22,818] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-05 01:59:22,818] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-05 01:59:22,819] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-05 01:59:22,820] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-05 01:59:22,820] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0006113052368164062 seconds +0: [2023-02-05 01:59:22,821] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-05 01:59:22,881] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=17 [0, 17) STAGE_PARAMS=82741760 (82.742M) TOTAL_PARAMS=82741760 (82.742M) UNIQUE_PARAMS=82741760 (82.742M) +0: [2023-02-05 01:59:22,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:22,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-05 01:59:22,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:22,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:22,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:22,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:22,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-05 01:59:22,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:22,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:22,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-05 01:59:23,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-05 01:59:23,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-05 01:59:23,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-05 01:59:23,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-05 01:59:23,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-05 01:59:23,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-05 01:59:23,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-05 01:59:23,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-05 01:59:23,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-05 01:59:23,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-05 01:59:23,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-05 01:59:23,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-05 01:59:23,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-05 01:59:23,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-05 01:59:23,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-05 01:59:23,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-05 01:59:23,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-05 01:59:23,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:23,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-05 01:59:23,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-05 01:59:23,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-05 01:59:23,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-05 01:59:23,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-05 01:59:23,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-05 01:59:23,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-05 01:59:23,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-05 01:59:23,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:23,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-05 01:59:23,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-05 01:59:23,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-05 01:59:23,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-05 01:59:23,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-05 01:59:23,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-05 01:59:23,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-05 01:59:23,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-05 01:59:23,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-05 01:59:23,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-05 01:59:23,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-05 01:59:23,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-05 01:59:23,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,849] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-05 01:59:23,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,856] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-05 01:59:23,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:23,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:23,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:23,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:23,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-05 01:59:23,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:23,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-05 01:59:24,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding learning rate value to 0.0002 +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: > overriding minimum learning rate value to 2e-05 +3: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding warmup iterations value to 0 +5: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +2: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-05 01:59:24,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-05 01:59:24,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-05 01:59:24,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:24,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:24,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:24,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:24,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:24,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:24,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:24,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:24,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-02-05 01:59:24,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:24,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:24,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:24,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:24,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:24,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:24,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:24,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-05 01:59:24,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:24,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:24,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:24,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:24,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:24,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:24,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:24,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-02-05 01:59:24,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +7: [2023-02-05 01:59:24,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,127] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +7: [2023-02-05 01:59:24,128] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +7: [2023-02-05 01:59:24,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,129] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +7: [2023-02-05 01:59:24,131] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +6: [2023-02-05 01:59:24,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:24,134] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +6: [2023-02-05 01:59:24,136] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +1: [2023-02-05 01:59:24,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:24,139] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +1: [2023-02-05 01:59:24,141] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +3: [2023-02-05 01:59:24,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:24,141] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +5: [2023-02-05 01:59:24,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:24,141] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +3: [2023-02-05 01:59:24,142] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +7: [2023-02-05 01:59:24,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,143] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +4: [2023-02-05 01:59:24,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,143] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +5: [2023-02-05 01:59:24,143] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +7: [2023-02-05 01:59:24,144] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +4: [2023-02-05 01:59:24,145] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +5: [2023-02-05 01:59:24,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:24,146] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +5: [2023-02-05 01:59:24,147] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +2: [2023-02-05 01:59:24,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:24,147] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +2: [2023-02-05 01:59:24,149] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +1: [2023-02-05 01:59:24,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:24,150] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +2: [2023-02-05 01:59:24,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:24,152] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +1: [2023-02-05 01:59:24,151] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +1: [2023-02-05 01:59:24,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:24,152] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +2: [2023-02-05 01:59:24,153] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +1: [2023-02-05 01:59:24,154] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +7: [2023-02-05 01:59:24,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,154] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +7: [2023-02-05 01:59:24,155] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +7: [2023-02-05 01:59:24,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,155] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +7: [2023-02-05 01:59:24,156] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +7: [2023-02-05 01:59:24,156] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +7: [2023-02-05 01:59:24,157] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +0: [2023-02-05 01:59:24,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:24,159] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +4: [2023-02-05 01:59:24,159] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +1: [2023-02-05 01:59:24,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:24,160] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +4: [2023-02-05 01:59:24,160] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +0: [2023-02-05 01:59:24,161] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +1: [2023-02-05 01:59:24,161] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +2: [2023-02-05 01:59:24,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:24,163] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +3: [2023-02-05 01:59:24,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:24,164] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +3: [2023-02-05 01:59:24,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:24,165] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +6: [2023-02-05 01:59:24,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:24,165] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +6: [2023-02-05 01:59:24,165] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +7: [2023-02-05 01:59:24,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,165] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +3: [2023-02-05 01:59:24,166] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +7: [2023-02-05 01:59:24,166] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +6: [2023-02-05 01:59:24,167] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +3: [2023-02-05 01:59:24,167] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +6: [2023-02-05 01:59:24,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:24,167] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +0: [2023-02-05 01:59:24,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:24,167] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +4: [2023-02-05 01:59:24,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,168] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +6: [2023-02-05 01:59:24,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:24,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:24,168] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +2: [2023-02-05 01:59:24,168] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +6: [2023-02-05 01:59:24,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:24,168] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +6: [2023-02-05 01:59:24,168] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +3: [2023-02-05 01:59:24,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:24,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:24,169] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +3: [2023-02-05 01:59:24,169] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +0: [2023-02-05 01:59:24,169] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +6: [2023-02-05 01:59:24,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:24,169] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +2: [2023-02-05 01:59:24,169] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +6: [2023-02-05 01:59:24,169] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +6: [2023-02-05 01:59:24,170] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +0: [2023-02-05 01:59:24,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:24,170] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +3: [2023-02-05 01:59:24,170] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +3: [2023-02-05 01:59:24,170] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +4: [2023-02-05 01:59:24,170] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +0: [2023-02-05 01:59:24,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:24,170] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +3: [2023-02-05 01:59:24,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,170] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +6: [2023-02-05 01:59:24,170] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +3: [2023-02-05 01:59:24,170] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +5: [2023-02-05 01:59:24,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:24,171] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +5: [2023-02-05 01:59:24,171] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +0: [2023-02-05 01:59:24,171] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +5: [2023-02-05 01:59:24,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:24,172] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +4: [2023-02-05 01:59:24,172] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +3: [2023-02-05 01:59:24,172] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +4: [2023-02-05 01:59:24,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,173] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +5: [2023-02-05 01:59:24,173] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +5: [2023-02-05 01:59:24,173] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +4: [2023-02-05 01:59:24,174] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +5: [2023-02-05 01:59:24,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:24,175] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +2: [2023-02-05 01:59:24,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:24,175] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +5: [2023-02-05 01:59:24,176] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +2: [2023-02-05 01:59:24,176] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +2: [2023-02-05 01:59:24,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:24,177] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +2: [2023-02-05 01:59:24,178] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +0: [2023-02-05 01:59:24,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:24,179] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +0: [2023-02-05 01:59:24,180] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +4: [2023-02-05 01:59:24,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,182] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +2: [2023-02-05 01:59:24,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,184] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +2: [2023-02-05 01:59:24,183] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +2: [2023-02-05 01:59:24,185] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +5: [2023-02-05 01:59:24,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:24,186] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +1: [2023-02-05 01:59:24,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:24,187] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +5: [2023-02-05 01:59:24,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:24,187] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +5: [2023-02-05 01:59:24,187] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +0: [2023-02-05 01:59:24,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-02-05 01:59:24,187] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +5: [2023-02-05 01:59:24,188] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +1: [2023-02-05 01:59:24,188] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +0: [2023-02-05 01:59:24,189] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +0: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:24,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:24,193] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +0: [2023-02-05 01:59:24,193] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +0: [2023-02-05 01:59:24,194] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +3: [2023-02-05 01:59:24,194] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +3: [2023-02-05 01:59:24,195] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-02-05 01:59:24,195] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +4: [2023-02-05 01:59:24,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,197] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +3: [2023-02-05 01:59:24,197] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +4: [2023-02-05 01:59:24,198] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +7: [2023-02-05 01:59:24,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,198] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +1: [2023-02-05 01:59:24,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +7: [2023-02-05 01:59:24,199] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +1: [2023-02-05 01:59:24,200] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +1: [2023-02-05 01:59:24,201] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +1: [2023-02-05 01:59:24,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:24,203] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +0: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:24,205] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +2: [2023-02-05 01:59:24,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-02-05 01:59:24,205] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +0: [2023-02-05 01:59:24,205] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +2: [2023-02-05 01:59:24,206] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +0: [2023-02-05 01:59:24,206] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +5: [2023-02-05 01:59:24,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-02-05 01:59:24,207] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +5: [2023-02-05 01:59:24,208] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +4: [2023-02-05 01:59:24,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-02-05 01:59:24,209] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +4: [2023-02-05 01:59:24,210] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +6: [2023-02-05 01:59:24,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:24,242] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +6: [2023-02-05 01:59:24,243] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +1: [2023-02-05 01:59:24,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-02-05 01:59:24,247] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +1: [2023-02-05 01:59:24,249] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +6: [2023-02-05 01:59:24,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-05 01:59:24,275] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +6: [2023-02-05 01:59:24,276] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +0: successfully loaded checkpoint from checkpoints_83m20b20b at iteration 0 +7: time (ms) | load-checkpoint: 1402.76 +0: estimated model parameters: 0.08274176 +0: estimated model parameters without embeddings: 0.04923648 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-05 01:59:25 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.032301 seconds +0: number of documents: 41786294 +0: > dataset split: +0: train: +0: document indices in [0, 41786294) total of 41786294 documents +0: > WARNING: could not find index map files, building the indices on rank 0 ... +0: > only one epoch required, setting separate_last_epoch to False +0: > elasped time to build and save doc-idx mapping (seconds): 2.853608 +0: using: +0: number of documents: 41786294 +0: number of epochs: 1 +0: sequence length: 2048 +0: total number of samples: 9767462 +0: > elasped time to build and save sample-idx mapping (seconds): 0.478073 +0: > building shuffle index with split [0, 9767462) and [9767462, 9767462) ... +0: > elasped time to build and save shuffle-idx mapping (seconds): 0.272459 +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.352 seconds +0: total number of samples: 9767463 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.048317 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.068 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-05 01:59:44 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 18132.06 | train/valid/test-data-iterators-setup: 18980.85 +0: [after training is done] datetime: 2023-02-05 01:59:44 +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:553:forward] Activation Checkpointing Information +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:554:forward] ----Partition Activations False, CPU CHECKPOINTING False +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:557:forward] ----contiguous Memory Checkpointing False with None total layers +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:560:forward] ----Synchronization False +0: [2023-02-05 01:59:45,634] [INFO] [checkpointing.py:561:forward] ----Profiling time in checkpointing False +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.608018E+00 | lm loss PPL: 3.689286E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 2809865: Sun Feb 5 02:00:10 EET 2023 diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..650d78cfdb0259efb69a696751d30d39f3a02bc1 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.2402533472331397, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.026185917923928877}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05635234796714399, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001189715463938905}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.24477723820256925, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003911408477735492}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08695988732670223, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016533104433194333}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.026609175713836197, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000748423797004803}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.11897303489272501, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027873552881508687}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.041180837536686735, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001064310413346249}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05474668459996981, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001127997358624829}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23999150640210568, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0038562042868130783}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08469132973494022, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015838300470463675}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.05402951821779291, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011140410965541283}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23635512584590726, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003711184986500599}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08351466385050439, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015531618940798594}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5cf326e757b24a0163603768eddbd63f1bc8a889 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1966511475591434, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.019961027192588972}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.052203096200997605, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011814199935971487}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.23624534743367054, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004104260780661984}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.0803004872190622, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001586565623921053}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.022905553064196095, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006616729566122753}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10534165215136057, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027047796932073874}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03561113397442626, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009654261821009468}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05070197915896016, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011402093293298914}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2308796113126565, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004043545665546455}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07810279314923733, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015357417322684717}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04981348394373528, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001121609606106111}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22542947047472042, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038408108647505646}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07662533362763059, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014990302285155417}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..228db4d3225cff8ecfe8e50de03f49ef4a26ac6c --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1884219412086828, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.023296081112241036}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05126483868300737, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0012289835089092538}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22485099423647667, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003951756351109295}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07799553963261191, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016116099948632317}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021910691837222048, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006674972683578638}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09868793885112478, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00267972974482592}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.033996398337902196, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009807944540453622}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.049270188912098695, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001146672870138461}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21853634876925923, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003858236649166037}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07530843496272169, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015377151750192398}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04892666044601755, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011499291214700047}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2158681859950873, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037878816597245856}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07464177043046383, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015299665214804118}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..1d43cc519a926efbc9a90012e422b409732b0b18 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.17480172226443813, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.020902481247421167}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05022658854957582, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011483006429375308}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21766011266118138, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037980984559104767}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07671301972464661, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015827129539320492}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021098768311532776, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006649377981455256}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09361071943796677, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002581939249689689}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03267526954798376, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009673182645874134}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.048481590771377314, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010930663253747802}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21150576343847308, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003717086333074857}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07419860097105505, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015205147299916612}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04811636722846188, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00108771889241242}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2091653043239388, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036421804771378727}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07356401480470928, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015070391320228822}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..110058b342ba06d2d69e8a4b5d79664da6eed9ad --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.16911875641565555, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.021517687857935563}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05006343522986615, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011889599388540328}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21681139544905903, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003967685741672584}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07623947677441675, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015917618353225786}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021014766190286683, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006570715082182744}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09469441839577937, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002639071328542525}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.032460233176363706, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009488458515182864}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04817824516534705, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011215777709926644}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21048516120467528, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0038835744921377115}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0736245369022538, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001532311260104304}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.047823848011317574, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011179984956813653}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.20800373325037153, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037694527974004455}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07293428533299331, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015070742072700732}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..fe999b770593c7089f3709d1b1de4baeb084b163 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1651739066359778, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.013582380328803895}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05002434980801082, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013304410605971587}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2119514145396548, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003904652175945565}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07469945223979003, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001602258696177567}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.020755809373804224, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006578602827990767}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09202921529905005, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0025807353374024083}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.031831586296822034, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009440380624300895}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04832503657773521, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001264896440171536}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.20672314448320298, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0038314629173716295}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07248842677611263, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015461556121857058}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04803645077862967, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012672018364395272}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.20482117124374777, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037387249354123263}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07186377941809288, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001522991746384243}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8c324e9fa92bd99fe48801b4a7a48cde7a334b11 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.08896264475290212, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014961453658771753}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.14766114503997804, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0022853382090377147}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10235628004621973, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015352719576797514}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.010914932229793938, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000442574617725981}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.02021566730440207, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009776391473893518}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.012908343025675926, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005189004993993455}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07872520868819172, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012274871084527635}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.13301293352517277, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001988783940835733}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.09105207273135195, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012583390168369203}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.08310192751116388, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013861257331348126}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.1382205228298101, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021125660441938714}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09562117669012847, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014141072084287178}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5931231989483277, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.031434461936734784}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5ec814e1b56b1744474c03c81f90d37532ba80ce --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.10055265873683499, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014823849508849192}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.15721964279607673, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002101087218671987}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.11341944639978474, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014850357580665201}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.009009737162697751, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00040444338000367907}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.015105939700786192, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0007779189021910072}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.010373400181558124, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00046534552341471005}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07670911618729974, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010177180705200342}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12370483862247518, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016084886833877196}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08720424199215626, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010144699788013974}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09497668109572749, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013881457025051664}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.14924028152165453, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001990774352437416}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.10723368799120181, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001385283510429042}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5661195601128765, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02509943251177783}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..9a0b2d4aff6b94ea1a5d7215e417f232ce202a9b --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09850769820039676, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001393866950191024}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.15728022214901077, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0021264405312242268}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.11178095357846812, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014024615137554158}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.008059435228204459, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00036724418176074656}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.014751654998446549, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008627403125755772}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.00948402555732701, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00043499129797635845}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07543298554631261, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009710151708614845}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12367943931212322, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016565085405082212}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08613436213947881, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.000973897059590453}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09309972440355573, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013076759579022557}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.14900708831404136, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002010905961714977}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.10564492116920021, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00130869570379961}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.4817096842042167, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.032151798067684666}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..8797507371fcec1a019c50e9b5aed3c73cc89bc7 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.08876189997465327, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016524443908080243}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.13798843229202282, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002362366437072347}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.09726354234545957, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015677513232408066}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00898713301028085, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00044025120761783846}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.015915833954570136, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009534645333032286}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.010098316763211473, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004715818643641205}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.06967104181936852, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012637053121464842}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.11083894321654772, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019133053692465861}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.07641663175251441, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011479011013677293}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.0834028388857906, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015480707472114237}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.13022096210702444, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022276479151398013}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09143437561036061, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014579716026386462}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.6431613954247507, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04521270308701262}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..908b161ca45a0214ee23ad536acf3b89014fe353 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.03231162663905748, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014029192086070055}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.04886975678455771, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0020406266432912625}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.03397615224146499, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0013413090911707363}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.004316882219286408, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00040303179947527026}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.007567975619407399, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00072019577146174}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.004657543429312446, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003732961860283337}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.025705241800145585, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011151401955078694}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.039440132597804066, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016572672525447662}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.026926105397227285, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010332439990417993}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.030117595791150212, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013111889551577585}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.04548969274809294, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0018932906972936455}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.03157352959819198, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0012380063392149546}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.20685775778522772, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.022695897056587335}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.json b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d71661d87d483e0779c0733be6b00814fd3d503b --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.004848063463391125, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.000536192278674931}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.007935073724408357, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0009002006058473917}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.00512854488886217, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.000538151396940047}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0005679553743524742, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00011257703068984332}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.001098140505376618, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00024257176231275518}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0006648666537942359, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00012786524814426142}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.003902270744914557, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.000423672666149028}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.006533208860473738, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0007584316058423692}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.00413219989090764, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00042547479499473683}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.0046299381082077675, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.000508666379442921}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.007589744194655164, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0008608249815724205}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.004904311688657879, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005139320810319882}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 7.998017663218444e-07, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.5582594593529686e-06}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6b08ebcf330ebc5871dab88c13673bf6eac167d3 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 1.5942448530297823e-290, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.6666079809877698e-121}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.0003518518518518518, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0003335054684218052}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.0001858974358974359, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00010431359184166816}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.00012092550216120011, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 9.09013091480143e-05}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.0003333333333333333, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003333333333333273}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 2.7777777777777776e-05, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 2.7777777777776627e-05}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 5.1282051282051286e-05, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 5.128205128205036e-05}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.0003518518518518518, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0003335054684218052}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.0001858974358974359, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00010431359184166816}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.00012092550216120011, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 9.09013091480143e-05}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.0003518518518518518, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0003335054684218052}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.0001858974358974359, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00010431359184166816}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.00012092550216120011, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 9.09013091480143e-05}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6d209acd63ca9b63f6225dfe7728806029844bcd --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.0108167948149323, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10792382018960417}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4150584847267254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00486094852985764}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.23018252026731417, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002512865906826812}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.25134930784146337, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022111067502175144}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.20083802263830847, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.005774843254349016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.06311833340661611, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001259682053367179}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07157056272296036, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012898762177629953}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3404916823152853, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005065217097990015}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.17002953950443964, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001829503746638702}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.18851170439051954, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016248128513782103}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3655248997617407, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005019655012266495}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.1900917930098727, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002103942567542897}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.20954840481881282, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018787543803962393}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f91755b9c4a706686a278ca425fd7afc508b2480 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.603793232159829, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08602086565258178}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4779868239362067, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005553233194383071}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2218408152039529, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025251357951490017}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.24621485176179148, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021298419718764295}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.28015507564420217, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.007006513823446057}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.06174287451325578, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011946652104605105}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07271898435902731, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011983442669434524}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.41118978628391467, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005955799665988279}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1669828348723676, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018155749757754794}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.18939633658626337, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015242055814220488}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.43238831592921806, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005826966992945048}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.18493553054709222, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0020976632350393387}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.20783964128412136, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017715975694931382}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..8d7d2383949e4cebbfecd9f7deae65a00300bcec --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.3209857063675314, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12982076541178944}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5612866816665202, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006129081036510009}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.20087508926536532, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025216399129215977}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.23358145663602803, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021197904747158307}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.3892125848686342, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.007925833706318395}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.061534987252114044, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011957852304613182}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07741264613129926, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012143950767558279}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.5012871219001243, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00667094404075784}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.15375895092781094, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017820390356310474}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.18430304636118938, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015018369060906252}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.5213850921639839, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006496933622169016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.16963147902413764, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002077342539496508}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.20086572934525307, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017513840380721597}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..14c5151fbaf181d8773beecd0b66a7abaa4a7b4e --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.2921990304850484, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08543251982170741}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5796907087867259, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006171454753728164}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.20016357064955265, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002555704270475239}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.23318933610493578, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020775767402760356}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.41295355940107836, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.008022679828508067}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.06284240129070301, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012225472768686867}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0793602748827254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012046225439158768}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.5211414591118526, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.006749185053993893}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.15341379385275639, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017924618345547736}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.18494319337588916, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014693982609005228}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.5412722798442585, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006559752101552006}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.16952730489426254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0020957398904803224}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.20153553784002368, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001716474593973442}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..76c5d036db03a5bb385672f861e2269f5f52e0ce --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 1.925086755476535, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07460606701647725}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5973757904695731, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006271640540558927}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.1923538694713606, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024817861959041685}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2280447164700859, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020473595981112917}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.4376950007402379, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.008112624696462126}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.06086131939525761, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011484366650911057}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07923731473143852, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011758747556224521}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.541673436116618, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0068486676204078355}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.14900691337572508, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017633515667638841}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.18280675281871867, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014662677675956494}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.5586648028846495, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006679295650040034}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.1623031921397378, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0020094211700576207}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.1967172003563238, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001676113963181455}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_0.json b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d9e6985d23285ac22d1e8fab84e6500972cc9a43 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0932531173603635, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015120250012112578}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.22658547801108042, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0034238415570615605}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1301076426530973, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002005800630789775}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.014274889687797594, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006537571125742924}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03619983127433739, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016732561206435187}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.020193635492138703, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009172009114471265}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08248266489307994, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012123787530455208}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.20225938298416002, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0028877983346722353}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11541860519452854, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001626381586865247}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.07120007313928639, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011581187440043383}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.17499666394655364, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027595700656802896}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09965479142654927, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015562816394015851}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.7108334790337705, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.058162173996696875}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_1.json b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..c01603267f8b25f9ae5162648a42d5a93c71dec7 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08681874008608821, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015372287874623328}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21531348981463982, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003576186419830758}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12218534954690298, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002086105792744862}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.012509124905331995, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007420823011256812}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03157721578215402, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001783836316836288}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.01770150669759179, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010270994950922852}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07688577272866007, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012789662686650827}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.19164427424805588, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0030058089964809342}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10834326623147619, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001734887719558359}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06706908022443173, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012034239269148562}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16838691034513611, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002901367228885086}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0946683523065788, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016444941052935398}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.738240889840718, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0868248085435632}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_2.json b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f965cfc60ca1035efd270320164b4474ce3af901 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08344848795431818, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014501263751190221}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.20826530640557234, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0034504658033345666}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1176287038533819, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001972378143041748}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.011371076245523942, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006862950154624032}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.029673978711314167, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017700340282045873}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.016220771292250878, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009663493756094667}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07441186933301655, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012159583112653365}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18674988097913103, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029513766424467984}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10506213754165063, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016611947382865833}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06339905265698148, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001092340918395308}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16003098813771494, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027073758950265613}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.08963347056560689, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015009503965992478}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6710588893303108, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10334104691062508}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_3.json b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b5edc41a1df1b77b4c24e41ef4e34affa8d63bff --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08566757832306267, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016587474807451944}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.20357502479268652, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003598302895199109}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.11753538804203087, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020836683386094837}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.012532666305751455, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006509268191738087}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03174654587249015, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001651560460066696}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.01758783301448063, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008993414378996701}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07688005894422863, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001413211781346706}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.1833705494755132, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003075758477780908}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10552480046986407, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017553752294999442}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06597367200611912, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012975581502598706}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.15757361471543319, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028139783662738914}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09039597582466691, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015821613854120013}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6735453686380013, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07011141492068476}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_4.json b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ce7dde2e47fa8e45502841eb74f5ae0d2e0230b3 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.03132332576976698, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0023004915067675346}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.053496538783602984, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003144438540129738}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.03459946242562895, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020414383200811565}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.004519290679830224, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005984203253932537}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.008760932443251665, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009658740352901951}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.005457903769757144, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006055871477864605}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.026475030219787705, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018704914251495015}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.04652141598674097, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002719893393898828}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.02968085318772741, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017092195309002798}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.024457443251700062, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018438226051541928}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0414183937683806, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002446421641025846}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.026736187678505472, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015717317320738274}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.3141890910742015, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05421566585284457}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_5.json b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a1628051c4143aa3ded9d394aac340b9c4035935 --- /dev/null +++ b/83m20b20b/evaluation/generation/agg.83m20b20b_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0033087355492285686, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0009040554845808398}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0027236205204102274, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0007255225241620119}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0029510521900948873, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007941713282393438}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0004761124003632328, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00024265317442198996}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00043140614764249324, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0002265423309937391}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.00044926348899158137, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00023227082406786792}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.002457213814477674, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.000670321594996775}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0020484761065232117, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0005445612655945507}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0022017230619442475, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005901195656983681}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.002507662810542652, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.000690750451785873}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.00209892510258819, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005695502767532305}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0022521720580092257, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0006132441399757633}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.7319051598815575e-38, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.403355708154714e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f613605d74ffca1a06bd5cfa6c290c25eeefed7e --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa946ab6364576b3635803718a29e0aa7ae2aa84cc315b7ea6e7d7fc449fc7d +size 4155063 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c921901f7a81109fe1bb8d65b34508f6e36a2e6f --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0026d8454ae06f20a2a095b2c6cf3ce637009889041c222f54758e99f6e7a8f2 +size 5104695 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..79ac0549882ea7b69c0ac9755def7635bc5da643 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8cf4ab4100c58defe52d20bebbbad4da0d033cb5d458108db8335259610b77 +size 5973736 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..af28d7cc7712853c9c10b10e85b3f117421282fa --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c635f9498a520431b3d2580a0a177107117c914cd7217fdf88f12108bc44ac +size 6845433 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bc8353f140d8d1916e570f0f7a6af67d4971da70 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7f842ad2747995ee43e78e86a7aaf8734f131c9fd97398ba070d29baac3f00 +size 7729025 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ca65a380bf1758a1973239134d22991d04187b88 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9d738ae4cdbf49142be6b5e49736445fb7d69145e914de7ae9bbd98ff86a78 +size 8616397 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a562fde300e2d7c21ffa5c4e40953226bc380cc2 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a4dc8de54648ca35120b458547dd314ae7fe91328f9dbc5f04eea70c1c6ed8 +size 7660419 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..de85d51778ce21558ab4fa2fdc3580004b0cd6f8 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be710c31389cbe08c474a99cbd5cb0ef1561af54446bcc9f577ad8c2d62e903e +size 13339002 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9ae1d3a18d78e2b8fc71e351953588377ccae584 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c584b04555f5ef7bc3e9f62f7e4ad2e9506343811cf1a6d7e876f0051a817a7 +size 18945077 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..479cb313e21016f88e7edafa9e369ac581fe0c38 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc097ad5f2b1f7111b7074f0f8a36055e9e2d65c2ff2a3ded7b874f5b12e238 +size 24352124 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4d58cd8b2f82c1501b6ac43514fc3033db2fb7aa --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0742cafd56b150206875d395e54ac7253965d1be4f0422ad2c50e6d6168ec6a3 +size 29471580 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ebb4d7a55d3ac4f34681aa1e48e0d99a373524b4 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abaab9b01cc4371db5d979c1764cf67699b721f92e9006282319d875321e7009 +size 34799255 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3d187cb22ee28fbad0b960a2228f076f9a19a322 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3010a4215dd730bf4d0fd5dc12ad08a9be6a75c1430a2d1db7c270bb90c51229 +size 3620971 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..98e7e7d597858b92c5037851e380670d977662dd --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09957413eb5bb3fdc66758a34cffdba60e5dd3d5634bfd0f30db7cde1554fd9d +size 4978254 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1d736b9c0c0a60b2506aaa535279ff528b829584 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251b506e5e3ab668d4a4f08bb34836907b1aaa19cfa51ac5465f4ffedc6ba4f1 +size 6045786 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e9cf887fb5ed0f58c8ed00c1b7524b67a948f597 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e981c55f777a58a0f79b24371df115470219a19c530ee9ca87ff3a794687dcf +size 7087395 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d688c33912a4cf6ee3a41497972f561cc72fd910 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3245341dec3bb3a3a9eb6823056ffc9cb27a5738e882426e85c9b92732a861 +size 8162698 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ef36ab36f5bae4595ac8ca9bc43793b9ed820fd4 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7531324629c563fe471b09ee39b1b0cdbe34444aa0820cb8663dc4b47a6558ee +size 9238035 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_0.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6443ada660de12449810d42849bd694cc306f691 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba202fdcf4831b0cfbdbfad93bf741a46776a48db308250bbd2e0a5ab316ab12 +size 2839494 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_1.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9bad95c3768fdae409bea7f660850975b649fb88 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb1dc51d46effc41d7b7e7b2f634ce64b7e78113ffdf6f0b225586285ec2f5f +size 5109277 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_2.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..03c3e137597655f51ab3dd332cc998d6246d610b --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905d3afd7a9654634269d643ab2dcec20f7db43b27d51565f73119246f525180 +size 7384849 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_3.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f9a39f0ebb4831218f598c8e256d5941a10671b3 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d8dfdb35b1fc924f9da2753ff546337098a7c2c6f729dafc8b1d83b1dc81d49 +size 9652272 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_4.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f20cc1fbe07dc44a6551999e2ff8dd3baa9d649e --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456a6339f328cc4244f9a8071957f5bbf7321ca362c43ef67f69c09206136cf8 +size 11675010 diff --git a/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_5.jsonl b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..77ee168b4a53a544e64bbdbf897c7bb96f1c5fb2 --- /dev/null +++ b/83m20b20b/evaluation/generation/examples.83m20b20b_gem_xsum_article_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c8be2d8d9ea7cda808c7f55869c4d022ca8b5b153322762f13c21ee75ac542d +size 13897568 diff --git a/83m20b20b/evaluation/generation/merged.csv b/83m20b20b/evaluation/generation/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..251748024f2215748e39d709ab89f29d12be09ad --- /dev/null +++ b/83m20b20b/evaluation/generation/merged.csv @@ -0,0 +1,53 @@ +dataset,fewshots,prompt,metric,value +e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,5.1282051282051286e-05 +e2e_nlg_cleaned,0,median,rouge2_fmeasure,5.1282051282051286e-05 +e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.07157056272296036 +e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.07157056272296036 +e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.07271898435902731 +e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.07271898435902731 +e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.07741264613129926 +e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.07741264613129926 +e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.0793602748827254 +e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.0793602748827254 +e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.07923731473143852 +e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.07923731473143852 +e2e_nlg_cleaned,5,average,multiple,0.06339184414645549 +gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.020193635492138703 +gem_xsum,0,median,rouge2_fmeasure,0.020193635492138703 +gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.01770150669759179 +gem_xsum,1,median,rouge2_fmeasure,0.01770150669759179 +gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.016220771292250878 +gem_xsum,2,median,rouge2_fmeasure,0.016220771292250878 +gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.01758783301448063 +gem_xsum,3,median,rouge2_fmeasure,0.01758783301448063 +gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.005457903769757144 +gem_xsum,4,median,rouge2_fmeasure,0.005457903769757144 +gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.00044926348899158137 +gem_xsum,5,median,rouge2_fmeasure,0.00044926348899158137 +gem_xsum,5,average,multiple,0.012935152292535121 +web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.041180837536686735 +web_nlg_en,0,median,rouge2_fmeasure,0.041180837536686735 +web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.03561113397442626 +web_nlg_en,1,median,rouge2_fmeasure,0.03561113397442626 +web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.033996398337902196 +web_nlg_en,2,median,rouge2_fmeasure,0.033996398337902196 +web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.03267526954798376 +web_nlg_en,3,median,rouge2_fmeasure,0.03267526954798376 +web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.032460233176363706 +web_nlg_en,4,median,rouge2_fmeasure,0.032460233176363706 +web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.031831586296822034 +web_nlg_en,5,median,rouge2_fmeasure,0.031831586296822034 +web_nlg_en,5,average,multiple,0.03462590981169745 +wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.012908343025675926 +wiki_lingua_en,0,median,rouge2_fmeasure,0.012908343025675926 +wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.010373400181558124 +wiki_lingua_en,1,median,rouge2_fmeasure,0.010373400181558124 +wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.00948402555732701 +wiki_lingua_en,2,median,rouge2_fmeasure,0.00948402555732701 +wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.010098316763211473 +wiki_lingua_en,3,median,rouge2_fmeasure,0.010098316763211473 +wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.004657543429312446 +wiki_lingua_en,4,median,rouge2_fmeasure,0.004657543429312446 +wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0006648666537942359 +wiki_lingua_en,5,median,rouge2_fmeasure,0.0006648666537942359 +wiki_lingua_en,5,average,multiple,0.008031082601813202 diff --git a/83m20b20b/evaluation/generation/merged.json b/83m20b20b/evaluation/generation/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..6b37b666a986bd43c2c94496596d4120383a3b36 --- /dev/null +++ b/83m20b20b/evaluation/generation/merged.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.2402533472331397, "bleu_stderr": 0.026185917923928877, "rouge1_fmeasure": 0.08695988732670223, "rouge1_fmeasure_stderr": 0.0016533104433194333, "rouge1_precision": 0.05635234796714399, "rouge1_precision_stderr": 0.001189715463938905, "rouge1_recall": 0.24477723820256925, "rouge1_recall_stderr": 0.003911408477735492, "rouge2_fmeasure": 0.041180837536686735, "rouge2_fmeasure_stderr": 0.001064310413346249, "rouge2_precision": 0.026609175713836197, "rouge2_precision_stderr": 0.000748423797004803, "rouge2_recall": 0.11897303489272501, "rouge2_recall_stderr": 0.0027873552881508687, "rougeL_fmeasure": 0.08469132973494022, "rougeL_fmeasure_stderr": 0.0015838300470463675, "rougeL_precision": 0.05474668459996981, "rougeL_precision_stderr": 0.001127997358624829, "rougeL_recall": 0.23999150640210568, "rougeL_recall_stderr": 0.0038562042868130783, "rougeLsum_fmeasure": 0.08351466385050439, "rougeLsum_fmeasure_stderr": 0.0015531618940798594, "rougeLsum_precision": 0.05402951821779291, "rougeLsum_precision_stderr": 0.0011140410965541283, "rougeLsum_recall": 0.23635512584590726, "rougeLsum_recall_stderr": 0.003711184986500599}}, "1": {"PALM_prompt": {"bleu": 0.1966511475591434, "bleu_stderr": 0.019961027192588972, "rouge1_fmeasure": 0.0803004872190622, "rouge1_fmeasure_stderr": 0.001586565623921053, "rouge1_precision": 0.052203096200997605, "rouge1_precision_stderr": 0.0011814199935971487, "rouge1_recall": 0.23624534743367054, "rouge1_recall_stderr": 0.004104260780661984, "rouge2_fmeasure": 0.03561113397442626, "rouge2_fmeasure_stderr": 0.0009654261821009468, "rouge2_precision": 0.022905553064196095, "rouge2_precision_stderr": 0.0006616729566122753, "rouge2_recall": 0.10534165215136057, "rouge2_recall_stderr": 0.0027047796932073874, "rougeL_fmeasure": 0.07810279314923733, "rougeL_fmeasure_stderr": 0.0015357417322684717, "rougeL_precision": 0.05070197915896016, "rougeL_precision_stderr": 0.0011402093293298914, "rougeL_recall": 0.2308796113126565, "rougeL_recall_stderr": 0.004043545665546455, "rougeLsum_fmeasure": 0.07662533362763059, "rougeLsum_fmeasure_stderr": 0.0014990302285155417, "rougeLsum_precision": 0.04981348394373528, "rougeLsum_precision_stderr": 0.001121609606106111, "rougeLsum_recall": 0.22542947047472042, "rougeLsum_recall_stderr": 0.0038408108647505646}}, "2": {"PALM_prompt": {"bleu": 0.1884219412086828, "bleu_stderr": 0.023296081112241036, "rouge1_fmeasure": 0.07799553963261191, "rouge1_fmeasure_stderr": 0.0016116099948632317, "rouge1_precision": 0.05126483868300737, "rouge1_precision_stderr": 0.0012289835089092538, "rouge1_recall": 0.22485099423647667, "rouge1_recall_stderr": 0.003951756351109295, "rouge2_fmeasure": 0.033996398337902196, "rouge2_fmeasure_stderr": 0.0009807944540453622, "rouge2_precision": 0.021910691837222048, "rouge2_precision_stderr": 0.0006674972683578638, "rouge2_recall": 0.09868793885112478, "rouge2_recall_stderr": 0.00267972974482592, "rougeL_fmeasure": 0.07530843496272169, "rougeL_fmeasure_stderr": 0.0015377151750192398, "rougeL_precision": 0.049270188912098695, "rougeL_precision_stderr": 0.001146672870138461, "rougeL_recall": 0.21853634876925923, "rougeL_recall_stderr": 0.003858236649166037, "rougeLsum_fmeasure": 0.07464177043046383, "rougeLsum_fmeasure_stderr": 0.0015299665214804118, "rougeLsum_precision": 0.04892666044601755, "rougeLsum_precision_stderr": 0.0011499291214700047, "rougeLsum_recall": 0.2158681859950873, "rougeLsum_recall_stderr": 0.0037878816597245856}}, "3": {"PALM_prompt": {"bleu": 0.17480172226443813, "bleu_stderr": 0.020902481247421167, "rouge1_fmeasure": 0.07671301972464661, "rouge1_fmeasure_stderr": 0.0015827129539320492, "rouge1_precision": 0.05022658854957582, "rouge1_precision_stderr": 0.0011483006429375308, "rouge1_recall": 0.21766011266118138, "rouge1_recall_stderr": 0.0037980984559104767, "rouge2_fmeasure": 0.03267526954798376, "rouge2_fmeasure_stderr": 0.0009673182645874134, "rouge2_precision": 0.021098768311532776, "rouge2_precision_stderr": 0.0006649377981455256, "rouge2_recall": 0.09361071943796677, "rouge2_recall_stderr": 0.002581939249689689, "rougeL_fmeasure": 0.07419860097105505, "rougeL_fmeasure_stderr": 0.0015205147299916612, "rougeL_precision": 0.048481590771377314, "rougeL_precision_stderr": 0.0010930663253747802, "rougeL_recall": 0.21150576343847308, "rougeL_recall_stderr": 0.003717086333074857, "rougeLsum_fmeasure": 0.07356401480470928, "rougeLsum_fmeasure_stderr": 0.0015070391320228822, "rougeLsum_precision": 0.04811636722846188, "rougeLsum_precision_stderr": 0.00108771889241242, "rougeLsum_recall": 0.2091653043239388, "rougeLsum_recall_stderr": 0.0036421804771378727}}, "4": {"PALM_prompt": {"bleu": 0.16911875641565555, "bleu_stderr": 0.021517687857935563, "rouge1_fmeasure": 0.07623947677441675, "rouge1_fmeasure_stderr": 0.0015917618353225786, "rouge1_precision": 0.05006343522986615, "rouge1_precision_stderr": 0.0011889599388540328, "rouge1_recall": 0.21681139544905903, "rouge1_recall_stderr": 0.003967685741672584, "rouge2_fmeasure": 0.032460233176363706, "rouge2_fmeasure_stderr": 0.0009488458515182864, "rouge2_precision": 0.021014766190286683, "rouge2_precision_stderr": 0.0006570715082182744, "rouge2_recall": 0.09469441839577937, "rouge2_recall_stderr": 0.002639071328542525, "rougeL_fmeasure": 0.0736245369022538, "rougeL_fmeasure_stderr": 0.001532311260104304, "rougeL_precision": 0.04817824516534705, "rougeL_precision_stderr": 0.0011215777709926644, "rougeL_recall": 0.21048516120467528, "rougeL_recall_stderr": 0.0038835744921377115, "rougeLsum_fmeasure": 0.07293428533299331, "rougeLsum_fmeasure_stderr": 0.0015070742072700732, "rougeLsum_precision": 0.047823848011317574, "rougeLsum_precision_stderr": 0.0011179984956813653, "rougeLsum_recall": 0.20800373325037153, "rougeLsum_recall_stderr": 0.0037694527974004455}}, "5": {"PALM_prompt": {"bleu": 0.1651739066359778, "bleu_stderr": 0.013582380328803895, "rouge1_fmeasure": 0.07469945223979003, "rouge1_fmeasure_stderr": 0.001602258696177567, "rouge1_precision": 0.05002434980801082, "rouge1_precision_stderr": 0.0013304410605971587, "rouge1_recall": 0.2119514145396548, "rouge1_recall_stderr": 0.003904652175945565, "rouge2_fmeasure": 0.031831586296822034, "rouge2_fmeasure_stderr": 0.0009440380624300895, "rouge2_precision": 0.020755809373804224, "rouge2_precision_stderr": 0.0006578602827990767, "rouge2_recall": 0.09202921529905005, "rouge2_recall_stderr": 0.0025807353374024083, "rougeL_fmeasure": 0.07248842677611263, "rougeL_fmeasure_stderr": 0.0015461556121857058, "rougeL_precision": 0.04832503657773521, "rougeL_precision_stderr": 0.001264896440171536, "rougeL_recall": 0.20672314448320298, "rougeL_recall_stderr": 0.0038314629173716295, "rougeLsum_fmeasure": 0.07186377941809288, "rougeLsum_fmeasure_stderr": 0.001522991746384243, "rougeLsum_precision": 0.04803645077862967, "rougeLsum_precision_stderr": 0.0012672018364395272, "rougeLsum_recall": 0.20482117124374777, "rougeLsum_recall_stderr": 0.0037387249354123263}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 0.5931231989483277, "bleu_stderr": 0.031434461936734784, "rouge1_fmeasure": 0.10235628004621973, "rouge1_fmeasure_stderr": 0.0015352719576797514, "rouge1_precision": 0.08896264475290212, "rouge1_precision_stderr": 0.0014961453658771753, "rouge1_recall": 0.14766114503997804, "rouge1_recall_stderr": 0.0022853382090377147, "rouge2_fmeasure": 0.012908343025675926, "rouge2_fmeasure_stderr": 0.0005189004993993455, "rouge2_precision": 0.010914932229793938, "rouge2_precision_stderr": 0.000442574617725981, "rouge2_recall": 0.02021566730440207, "rouge2_recall_stderr": 0.0009776391473893518, "rougeL_fmeasure": 0.09105207273135195, "rougeL_fmeasure_stderr": 0.0012583390168369203, "rougeL_precision": 0.07872520868819172, "rougeL_precision_stderr": 0.0012274871084527635, "rougeL_recall": 0.13301293352517277, "rougeL_recall_stderr": 0.001988783940835733, "rougeLsum_fmeasure": 0.09562117669012847, "rougeLsum_fmeasure_stderr": 0.0014141072084287178, "rougeLsum_precision": 0.08310192751116388, "rougeLsum_precision_stderr": 0.0013861257331348126, "rougeLsum_recall": 0.1382205228298101, "rougeLsum_recall_stderr": 0.0021125660441938714}}, "1": {"tldr_en": {"bleu": 0.5661195601128765, "bleu_stderr": 0.02509943251177783, "rouge1_fmeasure": 0.11341944639978474, "rouge1_fmeasure_stderr": 0.0014850357580665201, "rouge1_precision": 0.10055265873683499, "rouge1_precision_stderr": 0.0014823849508849192, "rouge1_recall": 0.15721964279607673, "rouge1_recall_stderr": 0.002101087218671987, "rouge2_fmeasure": 0.010373400181558124, "rouge2_fmeasure_stderr": 0.00046534552341471005, "rouge2_precision": 0.009009737162697751, "rouge2_precision_stderr": 0.00040444338000367907, "rouge2_recall": 0.015105939700786192, "rouge2_recall_stderr": 0.0007779189021910072, "rougeL_fmeasure": 0.08720424199215626, "rougeL_fmeasure_stderr": 0.0010144699788013974, "rougeL_precision": 0.07670911618729974, "rougeL_precision_stderr": 0.0010177180705200342, "rougeL_recall": 0.12370483862247518, "rougeL_recall_stderr": 0.0016084886833877196, "rougeLsum_fmeasure": 0.10723368799120181, "rougeLsum_fmeasure_stderr": 0.001385283510429042, "rougeLsum_precision": 0.09497668109572749, "rougeLsum_precision_stderr": 0.0013881457025051664, "rougeLsum_recall": 0.14924028152165453, "rougeLsum_recall_stderr": 0.001990774352437416}}, "2": {"tldr_en": {"bleu": 0.4817096842042167, "bleu_stderr": 0.032151798067684666, "rouge1_fmeasure": 0.11178095357846812, "rouge1_fmeasure_stderr": 0.0014024615137554158, "rouge1_precision": 0.09850769820039676, "rouge1_precision_stderr": 0.001393866950191024, "rouge1_recall": 0.15728022214901077, "rouge1_recall_stderr": 0.0021264405312242268, "rouge2_fmeasure": 0.00948402555732701, "rouge2_fmeasure_stderr": 0.00043499129797635845, "rouge2_precision": 0.008059435228204459, "rouge2_precision_stderr": 0.00036724418176074656, "rouge2_recall": 0.014751654998446549, "rouge2_recall_stderr": 0.0008627403125755772, "rougeL_fmeasure": 0.08613436213947881, "rougeL_fmeasure_stderr": 0.000973897059590453, "rougeL_precision": 0.07543298554631261, "rougeL_precision_stderr": 0.0009710151708614845, "rougeL_recall": 0.12367943931212322, "rougeL_recall_stderr": 0.0016565085405082212, "rougeLsum_fmeasure": 0.10564492116920021, "rougeLsum_fmeasure_stderr": 0.00130869570379961, "rougeLsum_precision": 0.09309972440355573, "rougeLsum_precision_stderr": 0.0013076759579022557, "rougeLsum_recall": 0.14900708831404136, "rougeLsum_recall_stderr": 0.002010905961714977}}, "3": {"tldr_en": {"bleu": 0.6431613954247507, "bleu_stderr": 0.04521270308701262, "rouge1_fmeasure": 0.09726354234545957, "rouge1_fmeasure_stderr": 0.0015677513232408066, "rouge1_precision": 0.08876189997465327, "rouge1_precision_stderr": 0.0016524443908080243, "rouge1_recall": 0.13798843229202282, "rouge1_recall_stderr": 0.002362366437072347, "rouge2_fmeasure": 0.010098316763211473, "rouge2_fmeasure_stderr": 0.0004715818643641205, "rouge2_precision": 0.00898713301028085, "rouge2_precision_stderr": 0.00044025120761783846, "rouge2_recall": 0.015915833954570136, "rouge2_recall_stderr": 0.0009534645333032286, "rougeL_fmeasure": 0.07641663175251441, "rougeL_fmeasure_stderr": 0.0011479011013677293, "rougeL_precision": 0.06967104181936852, "rougeL_precision_stderr": 0.0012637053121464842, "rougeL_recall": 0.11083894321654772, "rougeL_recall_stderr": 0.0019133053692465861, "rougeLsum_fmeasure": 0.09143437561036061, "rougeLsum_fmeasure_stderr": 0.0014579716026386462, "rougeLsum_precision": 0.0834028388857906, "rougeLsum_precision_stderr": 0.0015480707472114237, "rougeLsum_recall": 0.13022096210702444, "rougeLsum_recall_stderr": 0.0022276479151398013}}, "4": {"tldr_en": {"bleu": 0.20685775778522772, "bleu_stderr": 0.022695897056587335, "rouge1_fmeasure": 0.03397615224146499, "rouge1_fmeasure_stderr": 0.0013413090911707363, "rouge1_precision": 0.03231162663905748, "rouge1_precision_stderr": 0.0014029192086070055, "rouge1_recall": 0.04886975678455771, "rouge1_recall_stderr": 0.0020406266432912625, "rouge2_fmeasure": 0.004657543429312446, "rouge2_fmeasure_stderr": 0.0003732961860283337, "rouge2_precision": 0.004316882219286408, "rouge2_precision_stderr": 0.00040303179947527026, "rouge2_recall": 0.007567975619407399, "rouge2_recall_stderr": 0.00072019577146174, "rougeL_fmeasure": 0.026926105397227285, "rougeL_fmeasure_stderr": 0.0010332439990417993, "rougeL_precision": 0.025705241800145585, "rougeL_precision_stderr": 0.0011151401955078694, "rougeL_recall": 0.039440132597804066, "rougeL_recall_stderr": 0.0016572672525447662, "rougeLsum_fmeasure": 0.03157352959819198, "rougeLsum_fmeasure_stderr": 0.0012380063392149546, "rougeLsum_precision": 0.030117595791150212, "rougeLsum_precision_stderr": 0.0013111889551577585, "rougeLsum_recall": 0.04548969274809294, "rougeLsum_recall_stderr": 0.0018932906972936455}}, "5": {"tldr_en": {"bleu": 7.998017663218444e-07, "bleu_stderr": 1.5582594593529686e-06, "rouge1_fmeasure": 0.00512854488886217, "rouge1_fmeasure_stderr": 0.000538151396940047, "rouge1_precision": 0.004848063463391125, "rouge1_precision_stderr": 0.000536192278674931, "rouge1_recall": 0.007935073724408357, "rouge1_recall_stderr": 0.0009002006058473917, "rouge2_fmeasure": 0.0006648666537942359, "rouge2_fmeasure_stderr": 0.00012786524814426142, "rouge2_precision": 0.0005679553743524742, "rouge2_precision_stderr": 0.00011257703068984332, "rouge2_recall": 0.001098140505376618, "rouge2_recall_stderr": 0.00024257176231275518, "rougeL_fmeasure": 0.00413219989090764, "rougeL_fmeasure_stderr": 0.00042547479499473683, "rougeL_precision": 0.003902270744914557, "rougeL_precision_stderr": 0.000423672666149028, "rougeL_recall": 0.006533208860473738, "rougeL_recall_stderr": 0.0007584316058423692, "rougeLsum_fmeasure": 0.004904311688657879, "rougeLsum_fmeasure_stderr": 0.0005139320810319882, "rougeLsum_precision": 0.0046299381082077675, "rougeLsum_precision_stderr": 0.000508666379442921, "rougeLsum_recall": 0.007589744194655164, "rougeLsum_recall_stderr": 0.0008608249815724205}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 1.5942448530297823e-290, "bleu_stderr": 1.6666079809877698e-121, "rouge1_fmeasure": 0.00012092550216120011, "rouge1_fmeasure_stderr": 9.09013091480143e-05, "rouge1_precision": 0.0003518518518518518, "rouge1_precision_stderr": 0.0003335054684218052, "rouge1_recall": 0.0001858974358974359, "rouge1_recall_stderr": 0.00010431359184166816, "rouge2_fmeasure": 5.1282051282051286e-05, "rouge2_fmeasure_stderr": 5.128205128205036e-05, "rouge2_precision": 0.0003333333333333333, "rouge2_precision_stderr": 0.0003333333333333273, "rouge2_recall": 2.7777777777777776e-05, "rouge2_recall_stderr": 2.7777777777776627e-05, "rougeL_fmeasure": 0.00012092550216120011, "rougeL_fmeasure_stderr": 9.09013091480143e-05, "rougeL_precision": 0.0003518518518518518, "rougeL_precision_stderr": 0.0003335054684218052, "rougeL_recall": 0.0001858974358974359, "rougeL_recall_stderr": 0.00010431359184166816, "rougeLsum_fmeasure": 0.00012092550216120011, "rougeLsum_fmeasure_stderr": 9.09013091480143e-05, "rougeLsum_precision": 0.0003518518518518518, "rougeLsum_precision_stderr": 0.0003335054684218052, "rougeLsum_recall": 0.0001858974358974359, "rougeLsum_recall_stderr": 0.00010431359184166816}}, "1": {"generate_text_restaurant": {"bleu": 3.0108167948149323, "bleu_stderr": 0.10792382018960417, "rouge1_fmeasure": 0.25134930784146337, "rouge1_fmeasure_stderr": 0.0022111067502175144, "rouge1_precision": 0.4150584847267254, "rouge1_precision_stderr": 0.00486094852985764, "rouge1_recall": 0.23018252026731417, "rouge1_recall_stderr": 0.002512865906826812, "rouge2_fmeasure": 0.07157056272296036, "rouge2_fmeasure_stderr": 0.0012898762177629953, "rouge2_precision": 0.20083802263830847, "rouge2_precision_stderr": 0.005774843254349016, "rouge2_recall": 0.06311833340661611, "rouge2_recall_stderr": 0.001259682053367179, "rougeL_fmeasure": 0.18851170439051954, "rougeL_fmeasure_stderr": 0.0016248128513782103, "rougeL_precision": 0.3404916823152853, "rougeL_precision_stderr": 0.005065217097990015, "rougeL_recall": 0.17002953950443964, "rougeL_recall_stderr": 0.001829503746638702, "rougeLsum_fmeasure": 0.20954840481881282, "rougeLsum_fmeasure_stderr": 0.0018787543803962393, "rougeLsum_precision": 0.3655248997617407, "rougeLsum_precision_stderr": 0.005019655012266495, "rougeLsum_recall": 0.1900917930098727, "rougeLsum_recall_stderr": 0.002103942567542897}}, "2": {"generate_text_restaurant": {"bleu": 2.603793232159829, "bleu_stderr": 0.08602086565258178, "rouge1_fmeasure": 0.24621485176179148, "rouge1_fmeasure_stderr": 0.0021298419718764295, "rouge1_precision": 0.4779868239362067, "rouge1_precision_stderr": 0.005553233194383071, "rouge1_recall": 0.2218408152039529, "rouge1_recall_stderr": 0.0025251357951490017, "rouge2_fmeasure": 0.07271898435902731, "rouge2_fmeasure_stderr": 0.0011983442669434524, "rouge2_precision": 0.28015507564420217, "rouge2_precision_stderr": 0.007006513823446057, "rouge2_recall": 0.06174287451325578, "rouge2_recall_stderr": 0.0011946652104605105, "rougeL_fmeasure": 0.18939633658626337, "rougeL_fmeasure_stderr": 0.0015242055814220488, "rougeL_precision": 0.41118978628391467, "rougeL_precision_stderr": 0.005955799665988279, "rougeL_recall": 0.1669828348723676, "rougeL_recall_stderr": 0.0018155749757754794, "rougeLsum_fmeasure": 0.20783964128412136, "rougeLsum_fmeasure_stderr": 0.0017715975694931382, "rougeLsum_precision": 0.43238831592921806, "rougeLsum_precision_stderr": 0.005826966992945048, "rougeLsum_recall": 0.18493553054709222, "rougeLsum_recall_stderr": 0.0020976632350393387}}, "3": {"generate_text_restaurant": {"bleu": 2.3209857063675314, "bleu_stderr": 0.12982076541178944, "rouge1_fmeasure": 0.23358145663602803, "rouge1_fmeasure_stderr": 0.0021197904747158307, "rouge1_precision": 0.5612866816665202, "rouge1_precision_stderr": 0.006129081036510009, "rouge1_recall": 0.20087508926536532, "rouge1_recall_stderr": 0.0025216399129215977, "rouge2_fmeasure": 0.07741264613129926, "rouge2_fmeasure_stderr": 0.0012143950767558279, "rouge2_precision": 0.3892125848686342, "rouge2_precision_stderr": 0.007925833706318395, "rouge2_recall": 0.061534987252114044, "rouge2_recall_stderr": 0.0011957852304613182, "rougeL_fmeasure": 0.18430304636118938, "rougeL_fmeasure_stderr": 0.0015018369060906252, "rougeL_precision": 0.5012871219001243, "rougeL_precision_stderr": 0.00667094404075784, "rougeL_recall": 0.15375895092781094, "rougeL_recall_stderr": 0.0017820390356310474, "rougeLsum_fmeasure": 0.20086572934525307, "rougeLsum_fmeasure_stderr": 0.0017513840380721597, "rougeLsum_precision": 0.5213850921639839, "rougeLsum_precision_stderr": 0.006496933622169016, "rougeLsum_recall": 0.16963147902413764, "rougeLsum_recall_stderr": 0.002077342539496508}}, "4": {"generate_text_restaurant": {"bleu": 2.2921990304850484, "bleu_stderr": 0.08543251982170741, "rouge1_fmeasure": 0.23318933610493578, "rouge1_fmeasure_stderr": 0.0020775767402760356, "rouge1_precision": 0.5796907087867259, "rouge1_precision_stderr": 0.006171454753728164, "rouge1_recall": 0.20016357064955265, "rouge1_recall_stderr": 0.002555704270475239, "rouge2_fmeasure": 0.0793602748827254, "rouge2_fmeasure_stderr": 0.0012046225439158768, "rouge2_precision": 0.41295355940107836, "rouge2_precision_stderr": 0.008022679828508067, "rouge2_recall": 0.06284240129070301, "rouge2_recall_stderr": 0.0012225472768686867, "rougeL_fmeasure": 0.18494319337588916, "rougeL_fmeasure_stderr": 0.0014693982609005228, "rougeL_precision": 0.5211414591118526, "rougeL_precision_stderr": 0.006749185053993893, "rougeL_recall": 0.15341379385275639, "rougeL_recall_stderr": 0.0017924618345547736, "rougeLsum_fmeasure": 0.20153553784002368, "rougeLsum_fmeasure_stderr": 0.001716474593973442, "rougeLsum_precision": 0.5412722798442585, "rougeLsum_precision_stderr": 0.006559752101552006, "rougeLsum_recall": 0.16952730489426254, "rougeLsum_recall_stderr": 0.0020957398904803224}}, "5": {"generate_text_restaurant": {"bleu": 1.925086755476535, "bleu_stderr": 0.07460606701647725, "rouge1_fmeasure": 0.2280447164700859, "rouge1_fmeasure_stderr": 0.0020473595981112917, "rouge1_precision": 0.5973757904695731, "rouge1_precision_stderr": 0.006271640540558927, "rouge1_recall": 0.1923538694713606, "rouge1_recall_stderr": 0.0024817861959041685, "rouge2_fmeasure": 0.07923731473143852, "rouge2_fmeasure_stderr": 0.0011758747556224521, "rouge2_precision": 0.4376950007402379, "rouge2_precision_stderr": 0.008112624696462126, "rouge2_recall": 0.06086131939525761, "rouge2_recall_stderr": 0.0011484366650911057, "rougeL_fmeasure": 0.18280675281871867, "rougeL_fmeasure_stderr": 0.0014662677675956494, "rougeL_precision": 0.541673436116618, "rougeL_precision_stderr": 0.0068486676204078355, "rougeL_recall": 0.14900691337572508, "rougeL_recall_stderr": 0.0017633515667638841, "rougeLsum_fmeasure": 0.1967172003563238, "rougeLsum_fmeasure_stderr": 0.001676113963181455, "rougeLsum_precision": 0.5586648028846495, "rougeLsum_precision_stderr": 0.006679295650040034, "rougeLsum_recall": 0.1623031921397378, "rougeLsum_recall_stderr": 0.0020094211700576207}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 0.7108334790337705, "bleu_stderr": 0.058162173996696875, "rouge1_fmeasure": 0.1301076426530973, "rouge1_fmeasure_stderr": 0.002005800630789775, "rouge1_precision": 0.0932531173603635, "rouge1_precision_stderr": 0.0015120250012112578, "rouge1_recall": 0.22658547801108042, "rouge1_recall_stderr": 0.0034238415570615605, "rouge2_fmeasure": 0.020193635492138703, "rouge2_fmeasure_stderr": 0.0009172009114471265, "rouge2_precision": 0.014274889687797594, "rouge2_precision_stderr": 0.0006537571125742924, "rouge2_recall": 0.03619983127433739, "rouge2_recall_stderr": 0.0016732561206435187, "rougeL_fmeasure": 0.11541860519452854, "rougeL_fmeasure_stderr": 0.001626381586865247, "rougeL_precision": 0.08248266489307994, "rougeL_precision_stderr": 0.0012123787530455208, "rougeL_recall": 0.20225938298416002, "rougeL_recall_stderr": 0.0028877983346722353, "rougeLsum_fmeasure": 0.09965479142654927, "rougeLsum_fmeasure_stderr": 0.0015562816394015851, "rougeLsum_precision": 0.07120007313928639, "rougeLsum_precision_stderr": 0.0011581187440043383, "rougeLsum_recall": 0.17499666394655364, "rougeLsum_recall_stderr": 0.0027595700656802896}}, "1": {"article_DOC_summary": {"bleu": 0.738240889840718, "bleu_stderr": 0.0868248085435632, "rouge1_fmeasure": 0.12218534954690298, "rouge1_fmeasure_stderr": 0.002086105792744862, "rouge1_precision": 0.08681874008608821, "rouge1_precision_stderr": 0.0015372287874623328, "rouge1_recall": 0.21531348981463982, "rouge1_recall_stderr": 0.003576186419830758, "rouge2_fmeasure": 0.01770150669759179, "rouge2_fmeasure_stderr": 0.0010270994950922852, "rouge2_precision": 0.012509124905331995, "rouge2_precision_stderr": 0.0007420823011256812, "rouge2_recall": 0.03157721578215402, "rouge2_recall_stderr": 0.001783836316836288, "rougeL_fmeasure": 0.10834326623147619, "rougeL_fmeasure_stderr": 0.001734887719558359, "rougeL_precision": 0.07688577272866007, "rougeL_precision_stderr": 0.0012789662686650827, "rougeL_recall": 0.19164427424805588, "rougeL_recall_stderr": 0.0030058089964809342, "rougeLsum_fmeasure": 0.0946683523065788, "rougeLsum_fmeasure_stderr": 0.0016444941052935398, "rougeLsum_precision": 0.06706908022443173, "rougeLsum_precision_stderr": 0.0012034239269148562, "rougeLsum_recall": 0.16838691034513611, "rougeLsum_recall_stderr": 0.002901367228885086}}, "2": {"article_DOC_summary": {"bleu": 0.6710588893303108, "bleu_stderr": 0.10334104691062508, "rouge1_fmeasure": 0.1176287038533819, "rouge1_fmeasure_stderr": 0.001972378143041748, "rouge1_precision": 0.08344848795431818, "rouge1_precision_stderr": 0.0014501263751190221, "rouge1_recall": 0.20826530640557234, "rouge1_recall_stderr": 0.0034504658033345666, "rouge2_fmeasure": 0.016220771292250878, "rouge2_fmeasure_stderr": 0.0009663493756094667, "rouge2_precision": 0.011371076245523942, "rouge2_precision_stderr": 0.0006862950154624032, "rouge2_recall": 0.029673978711314167, "rouge2_recall_stderr": 0.0017700340282045873, "rougeL_fmeasure": 0.10506213754165063, "rougeL_fmeasure_stderr": 0.0016611947382865833, "rougeL_precision": 0.07441186933301655, "rougeL_precision_stderr": 0.0012159583112653365, "rougeL_recall": 0.18674988097913103, "rougeL_recall_stderr": 0.0029513766424467984, "rougeLsum_fmeasure": 0.08963347056560689, "rougeLsum_fmeasure_stderr": 0.0015009503965992478, "rougeLsum_precision": 0.06339905265698148, "rougeLsum_precision_stderr": 0.001092340918395308, "rougeLsum_recall": 0.16003098813771494, "rougeLsum_recall_stderr": 0.0027073758950265613}}, "3": {"article_DOC_summary": {"bleu": 0.6735453686380013, "bleu_stderr": 0.07011141492068476, "rouge1_fmeasure": 0.11753538804203087, "rouge1_fmeasure_stderr": 0.0020836683386094837, "rouge1_precision": 0.08566757832306267, "rouge1_precision_stderr": 0.0016587474807451944, "rouge1_recall": 0.20357502479268652, "rouge1_recall_stderr": 0.003598302895199109, "rouge2_fmeasure": 0.01758783301448063, "rouge2_fmeasure_stderr": 0.0008993414378996701, "rouge2_precision": 0.012532666305751455, "rouge2_precision_stderr": 0.0006509268191738087, "rouge2_recall": 0.03174654587249015, "rouge2_recall_stderr": 0.001651560460066696, "rougeL_fmeasure": 0.10552480046986407, "rougeL_fmeasure_stderr": 0.0017553752294999442, "rougeL_precision": 0.07688005894422863, "rougeL_precision_stderr": 0.001413211781346706, "rougeL_recall": 0.1833705494755132, "rougeL_recall_stderr": 0.003075758477780908, "rougeLsum_fmeasure": 0.09039597582466691, "rougeLsum_fmeasure_stderr": 0.0015821613854120013, "rougeLsum_precision": 0.06597367200611912, "rougeLsum_precision_stderr": 0.0012975581502598706, "rougeLsum_recall": 0.15757361471543319, "rougeLsum_recall_stderr": 0.0028139783662738914}}, "4": {"article_DOC_summary": {"bleu": 0.3141890910742015, "bleu_stderr": 0.05421566585284457, "rouge1_fmeasure": 0.03459946242562895, "rouge1_fmeasure_stderr": 0.0020414383200811565, "rouge1_precision": 0.03132332576976698, "rouge1_precision_stderr": 0.0023004915067675346, "rouge1_recall": 0.053496538783602984, "rouge1_recall_stderr": 0.003144438540129738, "rouge2_fmeasure": 0.005457903769757144, "rouge2_fmeasure_stderr": 0.0006055871477864605, "rouge2_precision": 0.004519290679830224, "rouge2_precision_stderr": 0.0005984203253932537, "rouge2_recall": 0.008760932443251665, "rouge2_recall_stderr": 0.0009658740352901951, "rougeL_fmeasure": 0.02968085318772741, "rougeL_fmeasure_stderr": 0.0017092195309002798, "rougeL_precision": 0.026475030219787705, "rougeL_precision_stderr": 0.0018704914251495015, "rougeL_recall": 0.04652141598674097, "rougeL_recall_stderr": 0.002719893393898828, "rougeLsum_fmeasure": 0.026736187678505472, "rougeLsum_fmeasure_stderr": 0.0015717317320738274, "rougeLsum_precision": 0.024457443251700062, "rougeLsum_precision_stderr": 0.0018438226051541928, "rougeLsum_recall": 0.0414183937683806, "rougeLsum_recall_stderr": 0.002446421641025846}}, "5": {"article_DOC_summary": {"bleu": 1.7319051598815575e-38, "bleu_stderr": 2.403355708154714e-33, "rouge1_fmeasure": 0.0029510521900948873, "rouge1_fmeasure_stderr": 0.0007941713282393438, "rouge1_precision": 0.0033087355492285686, "rouge1_precision_stderr": 0.0009040554845808398, "rouge1_recall": 0.0027236205204102274, "rouge1_recall_stderr": 0.0007255225241620119, "rouge2_fmeasure": 0.00044926348899158137, "rouge2_fmeasure_stderr": 0.00023227082406786792, "rouge2_precision": 0.0004761124003632328, "rouge2_precision_stderr": 0.00024265317442198996, "rouge2_recall": 0.00043140614764249324, "rouge2_recall_stderr": 0.0002265423309937391, "rougeL_fmeasure": 0.0022017230619442475, "rougeL_fmeasure_stderr": 0.0005901195656983681, "rougeL_precision": 0.002457213814477674, "rougeL_precision_stderr": 0.000670321594996775, "rougeL_recall": 0.0020484761065232117, "rougeL_recall_stderr": 0.0005445612655945507, "rougeLsum_fmeasure": 0.0022521720580092257, "rougeLsum_fmeasure_stderr": 0.0006132441399757633, "rougeLsum_precision": 0.002507662810542652, "rougeLsum_precision_stderr": 0.000690750451785873, "rougeLsum_recall": 0.00209892510258819, "rougeLsum_recall_stderr": 0.0005695502767532305}}}} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1cba58116566293a74dbe4c308a27c19f20f0f3f --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.2402533472331397, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.026185917923928877 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05635234796714399, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001189715463938905 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.24477723820256925, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003911408477735492 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08695988732670223, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016533104433194333 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.026609175713836197, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.000748423797004803 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.11897303489272501, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0027873552881508687 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.041180837536686735, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.001064310413346249 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.05474668459996981, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001127997358624829 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23999150640210568, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0038562042868130783 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08469132973494022, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015838300470463675 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.05402951821779291, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011140410965541283 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.23635512584590726, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003711184986500599 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08351466385050439, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015531618940798594 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..efae60da420a3bf2e3b31ccc0bea6cee853d7b46 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1966511475591434, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.019961027192588972 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.052203096200997605, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011814199935971487 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.23624534743367054, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004104260780661984 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.0803004872190622, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001586565623921053 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.022905553064196095, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006616729566122753 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10534165215136057, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0027047796932073874 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03561113397442626, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009654261821009468 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.05070197915896016, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011402093293298914 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2308796113126565, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004043545665546455 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07810279314923733, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015357417322684717 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04981348394373528, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001121609606106111 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22542947047472042, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0038408108647505646 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07662533362763059, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014990302285155417 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..0a48281865ddec6f58f290bc20cd547f59d26530 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1884219412086828, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.023296081112241036 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05126483868300737, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0012289835089092538 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22485099423647667, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003951756351109295 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07799553963261191, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016116099948632317 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021910691837222048, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006674972683578638 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09868793885112478, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00267972974482592 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.033996398337902196, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009807944540453622 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.049270188912098695, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001146672870138461 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21853634876925923, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003858236649166037 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07530843496272169, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015377151750192398 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04892666044601755, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011499291214700047 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.2158681859950873, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0037878816597245856 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07464177043046383, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015299665214804118 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..200541bac56a3a687f2fb36ae333e8533bd0907c --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.17480172226443813, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.020902481247421167 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05022658854957582, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011483006429375308 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21766011266118138, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0037980984559104767 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07671301972464661, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015827129539320492 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021098768311532776, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006649377981455256 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09361071943796677, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002581939249689689 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03267526954798376, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009673182645874134 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.048481590771377314, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010930663253747802 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21150576343847308, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003717086333074857 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07419860097105505, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015205147299916612 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04811636722846188, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.00108771889241242 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.2091653043239388, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036421804771378727 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07356401480470928, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015070391320228822 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..064fa4e302cb3a52728d202648476c36c8e350be --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.16911875641565555, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.021517687857935563 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05006343522986615, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011889599388540328 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21681139544905903, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003967685741672584 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07623947677441675, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015917618353225786 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021014766190286683, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006570715082182744 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09469441839577937, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002639071328542525 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.032460233176363706, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009488458515182864 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04817824516534705, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011215777709926644 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21048516120467528, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0038835744921377115 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0736245369022538, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001532311260104304 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.047823848011317574, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011179984956813653 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.20800373325037153, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0037694527974004455 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07293428533299331, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015070742072700732 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6d4945e311220b0b7770c4692410116623c15b63 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1651739066359778, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.013582380328803895 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05002434980801082, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0013304410605971587 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.2119514145396548, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003904652175945565 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07469945223979003, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001602258696177567 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.020755809373804224, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006578602827990767 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09202921529905005, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0025807353374024083 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.031831586296822034, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009440380624300895 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04832503657773521, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001264896440171536 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.20672314448320298, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0038314629173716295 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07248842677611263, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015461556121857058 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04803645077862967, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0012672018364395272 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.20482117124374777, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0037387249354123263 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07186377941809288, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001522991746384243 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..00932a130912464bdca44a4fdd64570fe8b4ffd7 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.08896264475290212, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014961453658771753 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.14766114503997804, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0022853382090377147 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10235628004621973, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015352719576797514 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.010914932229793938, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.000442574617725981 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.02021566730440207, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009776391473893518 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.012908343025675926, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005189004993993455 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07872520868819172, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012274871084527635 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.13301293352517277, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001988783940835733 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.09105207273135195, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012583390168369203 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.08310192751116388, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013861257331348126 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.1382205228298101, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0021125660441938714 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09562117669012847, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014141072084287178 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5931231989483277, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.031434461936734784 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8988abbdd523397453e9980f4897630c6722ff4e --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.10055265873683499, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014823849508849192 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.15721964279607673, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002101087218671987 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.11341944639978474, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014850357580665201 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.009009737162697751, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00040444338000367907 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.015105939700786192, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0007779189021910072 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.010373400181558124, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00046534552341471005 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07670911618729974, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010177180705200342 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.12370483862247518, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0016084886833877196 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08720424199215626, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010144699788013974 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09497668109572749, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013881457025051664 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.14924028152165453, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001990774352437416 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.10723368799120181, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001385283510429042 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5661195601128765, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02509943251177783 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..1232e5ad48ce6050d5fd53832d0dce995c79d1e5 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.09850769820039676, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001393866950191024 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.15728022214901077, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0021264405312242268 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.11178095357846812, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014024615137554158 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.008059435228204459, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00036724418176074656 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.014751654998446549, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008627403125755772 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.00948402555732701, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00043499129797635845 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07543298554631261, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009710151708614845 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.12367943931212322, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0016565085405082212 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08613436213947881, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.000973897059590453 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09309972440355573, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013076759579022557 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.14900708831404136, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002010905961714977 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.10564492116920021, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00130869570379961 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.4817096842042167, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.032151798067684666 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..082199ed9b37d3d436db64c814ff9b125da8375a --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.08876189997465327, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0016524443908080243 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.13798843229202282, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002362366437072347 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.09726354234545957, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015677513232408066 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.00898713301028085, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00044025120761783846 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.015915833954570136, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009534645333032286 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.010098316763211473, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0004715818643641205 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.06967104181936852, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012637053121464842 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.11083894321654772, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0019133053692465861 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.07641663175251441, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0011479011013677293 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.0834028388857906, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0015480707472114237 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.13022096210702444, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0022276479151398013 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09143437561036061, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014579716026386462 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.6431613954247507, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.04521270308701262 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ea94c6dd5eab99efb2e6699592073ff0f00d8b91 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.03231162663905748, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014029192086070055 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.04886975678455771, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0020406266432912625 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.03397615224146499, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0013413090911707363 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.004316882219286408, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00040303179947527026 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.007567975619407399, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00072019577146174 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.004657543429312446, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003732961860283337 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.025705241800145585, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011151401955078694 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.039440132597804066, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0016572672525447662 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.026926105397227285, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010332439990417993 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.030117595791150212, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013111889551577585 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.04548969274809294, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0018932906972936455 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.03157352959819198, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0012380063392149546 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.20685775778522772, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.022695897056587335 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.json b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d3d9c31947a0e8858e850c1d0ea640d770bd2ca8 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.004848063463391125, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.000536192278674931 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.007935073724408357, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0009002006058473917 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.00512854488886217, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.000538151396940047 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.0005679553743524742, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00011257703068984332 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.001098140505376618, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00024257176231275518 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0006648666537942359, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00012786524814426142 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.003902270744914557, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.000423672666149028 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.006533208860473738, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0007584316058423692 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.00413219989090764, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.00042547479499473683 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.0046299381082077675, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.000508666379442921 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.007589744194655164, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0008608249815724205 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.004904311688657879, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0005139320810319882 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 7.998017663218444e-07, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 1.5582594593529686e-06 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..be756ac2e1b3b33b18c464ffa4bbb7ac53c8137f --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 1.5942448530297823e-290, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 1.6666079809877698e-121 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.0003518518518518518, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0003335054684218052 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.0001858974358974359, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.00010431359184166816 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.00012092550216120011, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 9.09013091480143e-05 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.0003333333333333333, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0003333333333333273 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 2.7777777777777776e-05, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 2.7777777777776627e-05 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 5.1282051282051286e-05, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 5.128205128205036e-05 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.0003518518518518518, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0003335054684218052 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.0001858974358974359, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.00010431359184166816 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.00012092550216120011, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 9.09013091480143e-05 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.0003518518518518518, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0003335054684218052 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.0001858974358974359, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.00010431359184166816 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.00012092550216120011, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 9.09013091480143e-05 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..c0038b33a19c2aba6fe5e7b516a7eccbb096dfb7 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.0108167948149323, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.10792382018960417 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4150584847267254, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.00486094852985764 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.23018252026731417, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002512865906826812 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.25134930784146337, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0022111067502175144 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.20083802263830847, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.005774843254349016 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.06311833340661611, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.001259682053367179 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07157056272296036, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0012898762177629953 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3404916823152853, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005065217097990015 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.17002953950443964, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.001829503746638702 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.18851170439051954, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0016248128513782103 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3655248997617407, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005019655012266495 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.1900917930098727, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002103942567542897 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.20954840481881282, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018787543803962393 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..5b373e2a595af66c6fc90a7ecc08babe97193719 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.603793232159829, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.08602086565258178 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4779868239362067, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.005553233194383071 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2218408152039529, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025251357951490017 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.24621485176179148, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021298419718764295 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.28015507564420217, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.007006513823446057 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.06174287451325578, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0011946652104605105 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07271898435902731, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0011983442669434524 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.41118978628391467, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005955799665988279 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.1669828348723676, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0018155749757754794 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.18939633658626337, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0015242055814220488 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.43238831592921806, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005826966992945048 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.18493553054709222, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0020976632350393387 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.20783964128412136, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0017715975694931382 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..dfd940fab81ae683dfa15a8a131766ecb1eddb1d --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.3209857063675314, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.12982076541178944 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.5612866816665202, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006129081036510009 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.20087508926536532, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025216399129215977 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.23358145663602803, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021197904747158307 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.3892125848686342, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.007925833706318395 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.061534987252114044, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0011957852304613182 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07741264613129926, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0012143950767558279 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.5012871219001243, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.00667094404075784 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.15375895092781094, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0017820390356310474 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.18430304636118938, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0015018369060906252 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.5213850921639839, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006496933622169016 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.16963147902413764, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002077342539496508 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.20086572934525307, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0017513840380721597 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6645cab616591901896167afcc70aa7f11f8467e --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.2921990304850484, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.08543251982170741 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.5796907087867259, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006171454753728164 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.20016357064955265, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002555704270475239 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.23318933610493578, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0020775767402760356 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.41295355940107836, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.008022679828508067 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.06284240129070301, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0012225472768686867 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.0793602748827254, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0012046225439158768 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.5211414591118526, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.006749185053993893 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.15341379385275639, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0017924618345547736 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.18494319337588916, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0014693982609005228 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.5412722798442585, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006559752101552006 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.16952730489426254, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0020957398904803224 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.20153553784002368, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001716474593973442 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..099ff0c3126d8a4cd8794e15a2c140a2f785e580 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 1.925086755476535, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.07460606701647725 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.5973757904695731, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006271640540558927 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.1923538694713606, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0024817861959041685 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2280447164700859, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0020473595981112917 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.4376950007402379, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.008112624696462126 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.06086131939525761, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0011484366650911057 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07923731473143852, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0011758747556224521 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.541673436116618, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0068486676204078355 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.14900691337572508, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0017633515667638841 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.18280675281871867, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0014662677675956494 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.5586648028846495, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006679295650040034 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.1623031921397378, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0020094211700576207 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.1967172003563238, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001676113963181455 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_0.json b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..ee476d3adb47d50e17f693d6e4be645c2c68d8fb --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.0932531173603635, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0015120250012112578 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.22658547801108042, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0034238415570615605 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.1301076426530973, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002005800630789775 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.014274889687797594, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006537571125742924 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03619983127433739, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0016732561206435187 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.020193635492138703, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009172009114471265 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.08248266489307994, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012123787530455208 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.20225938298416002, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0028877983346722353 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11541860519452854, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001626381586865247 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.07120007313928639, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011581187440043383 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.17499666394655364, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0027595700656802896 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09965479142654927, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015562816394015851 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.7108334790337705, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.058162173996696875 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_1.json b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..cbcc6f571787ae59a9c01eeb988211885e0d3f2d --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08681874008608821, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0015372287874623328 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21531348981463982, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003576186419830758 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12218534954690298, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002086105792744862 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.012509124905331995, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007420823011256812 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03157721578215402, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001783836316836288 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.01770150669759179, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010270994950922852 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07688577272866007, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012789662686650827 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.19164427424805588, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0030058089964809342 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10834326623147619, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001734887719558359 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06706908022443173, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0012034239269148562 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16838691034513611, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002901367228885086 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.0946683523065788, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016444941052935398 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.738240889840718, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.0868248085435632 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_2.json b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ab76233d75b47f685e40aded14c5f00cb7fd0e56 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08344848795431818, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014501263751190221 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.20826530640557234, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0034504658033345666 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.1176287038533819, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.001972378143041748 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.011371076245523942, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006862950154624032 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.029673978711314167, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017700340282045873 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.016220771292250878, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009663493756094667 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07441186933301655, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012159583112653365 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18674988097913103, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0029513766424467984 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10506213754165063, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016611947382865833 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06339905265698148, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001092340918395308 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16003098813771494, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0027073758950265613 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.08963347056560689, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015009503965992478 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6710588893303108, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.10334104691062508 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_3.json b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..56d672d5df90df191f2db363badf589df27f0473 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08566757832306267, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0016587474807451944 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.20357502479268652, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003598302895199109 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.11753538804203087, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020836683386094837 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.012532666305751455, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006509268191738087 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03174654587249015, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001651560460066696 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.01758783301448063, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008993414378996701 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07688005894422863, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001413211781346706 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.1833705494755132, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003075758477780908 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10552480046986407, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017553752294999442 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06597367200611912, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0012975581502598706 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.15757361471543319, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0028139783662738914 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09039597582466691, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015821613854120013 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6735453686380013, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.07011141492068476 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_4.json b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..3c8a65291234e2e6e3a2c59863f658c051152b78 --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.03132332576976698, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0023004915067675346 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.053496538783602984, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003144438540129738 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.03459946242562895, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020414383200811565 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.004519290679830224, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0005984203253932537 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.008760932443251665, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0009658740352901951 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.005457903769757144, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0006055871477864605 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.026475030219787705, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0018704914251495015 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.04652141598674097, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002719893393898828 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.02968085318772741, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017092195309002798 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.024457443251700062, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0018438226051541928 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.0414183937683806, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002446421641025846 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.026736187678505472, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015717317320738274 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.3141890910742015, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.05421566585284457 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_5.json b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d423e2d809b8987809eb0056c04b50855203d9eb --- /dev/null +++ b/83m20b20b/evaluation/generation/slim.83m20b20b_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.0033087355492285686, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0009040554845808398 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.0027236205204102274, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0007255225241620119 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.0029510521900948873, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0007941713282393438 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0004761124003632328, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.00024265317442198996 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.00043140614764249324, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0002265423309937391 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.00044926348899158137, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.00023227082406786792 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.002457213814477674, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.000670321594996775 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0020484761065232117, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0005445612655945507 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.0022017230619442475, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0005901195656983681 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.002507662810542652, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.000690750451785873 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.00209892510258819, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0005695502767532305 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.0022521720580092257, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0006132441399757633 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 1.7319051598815575e-38, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 2.403355708154714e-33 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-83m-20b/83m20b20b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_0.csv b/83m20b20b/evaluation/rankeval/83m20b20b_0.csv new file mode 100644 index 0000000000000000000000000000000000000000..960ec7279d278e6ea107c60f8c3f0ab1fbcfb312 --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_0.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.348,0.015070604603768408,0 +anli_r2,acc,0.344,0.015029633724408943,0 +anli_r3,acc,0.3325,0.013605417345710526,0 +arc_challenge,acc,0.18344709897610922,0.01131017017955454,0 +arc_challenge,acc_norm,0.2235494880546075,0.012174896631202607,0 +arc_easy,acc,0.3728956228956229,0.009922743197129241,0 +arc_easy,acc_norm,0.3514309764309764,0.00979639558281772,0 +boolq,acc,0.6201834862385321,0.008488668235778611,1 +cb,acc,0.32142857142857145,0.06297362289056341,1 +cb,f1,0.22212310437109728,,1 +copa,acc,0.61,0.04902071300001975,0 +hellaswag,acc,0.27155945030870343,0.004438549152538038,0 +hellaswag,acc_norm,0.27693686516630156,0.00446570481089354,0 +piqa,acc,0.6180631120783461,0.011335942557505231,0 +piqa,acc_norm,0.6055495103373232,0.011402931101558378,0 +rte,acc,0.51985559566787,0.030072723167317177,0 +sciq,acc,0.628,0.015292149942040577,0 +sciq,acc_norm,0.557,0.0157161699532041,0 +storycloze_2016,acc,0.55264564404062,0.011498161586686654,0 +winogrande,acc,0.5295974743488555,0.014027843827840086,0 diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_0.json b/83m20b20b/evaluation/rankeval/83m20b20b_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d8507d707cba6c2e88ca66ece54ecf27ffe04a9f --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_0.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.348, + "acc_stderr": 0.015070604603768408 + }, + "anli_r2": { + "acc": 0.344, + "acc_stderr": 0.015029633724408943 + }, + "anli_r3": { + "acc": 0.3325, + "acc_stderr": 0.013605417345710526 + }, + "cb": { + "acc": 0.32142857142857145, + "acc_stderr": 0.06297362289056341, + "f1": 0.22212310437109728 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001975 + }, + "hellaswag": { + "acc": 0.27155945030870343, + "acc_stderr": 0.004438549152538038, + "acc_norm": 0.27693686516630156, + "acc_norm_stderr": 0.00446570481089354 + }, + "rte": { + "acc": 0.51985559566787, + "acc_stderr": 0.030072723167317177 + }, + "winogrande": { + "acc": 0.5295974743488555, + "acc_stderr": 0.014027843827840086 + }, + "storycloze_2016": { + "acc": 0.55264564404062, + "acc_stderr": 0.011498161586686654 + }, + "boolq": { + "acc": 0.6201834862385321, + "acc_stderr": 0.008488668235778611 + }, + "arc_easy": { + "acc": 0.3728956228956229, + "acc_stderr": 0.009922743197129241, + "acc_norm": 0.3514309764309764, + "acc_norm_stderr": 0.00979639558281772 + }, + "arc_challenge": { + "acc": 0.18344709897610922, + "acc_stderr": 0.01131017017955454, + "acc_norm": 0.2235494880546075, + "acc_norm_stderr": 0.012174896631202607 + }, + "sciq": { + "acc": 0.628, + "acc_stderr": 0.015292149942040577, + "acc_norm": 0.557, + "acc_norm_stderr": 0.0157161699532041 + }, + "piqa": { + "acc": 0.6180631120783461, + "acc_stderr": 0.011335942557505231, + "acc_norm": 0.6055495103373232, + "acc_norm_stderr": 0.011402931101558378 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_1.csv b/83m20b20b/evaluation/rankeval/83m20b20b_1.csv new file mode 100644 index 0000000000000000000000000000000000000000..018c3e84291db7a4ae6eeb6518125b2d6631c8a3 --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_1.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.323,0.014794927843348635,0 +anli_r2,acc,0.314,0.014683991951087966,0 +anli_r3,acc,0.3458333333333333,0.013736245342311014,0 +arc_challenge,acc,0.17918088737201365,0.011207045216615665,0 +arc_challenge,acc_norm,0.21331058020477817,0.011970971742326334,0 +arc_easy,acc,0.3661616161616162,0.009885391390947719,0 +arc_easy,acc_norm,0.35395622895622897,0.009812370644174421,0 +boolq,acc,0.5214067278287462,0.008737036492417067,1 +cb,acc,0.4642857142857143,0.06724777654937658,1 +cb,f1,0.3251915708812261,,1 +copa,acc,0.6,0.049236596391733084,0 +hellaswag,acc,0.2733519219279028,0.004447691405592722,0 +hellaswag,acc_norm,0.27912766381198967,0.004476536569056587,0 +piqa,acc,0.6115342763873776,0.011371877593210249,0 +piqa,acc_norm,0.5963003264417845,0.011447407541749086,0 +rte,acc,0.5342960288808665,0.030025579819366422,0 +sciq,acc,0.62,0.015356947477797582,0 +sciq,acc_norm,0.558,0.015712507211864214,0 +storycloze_2016,acc,0.5494388027792624,0.011505771738769861,0 +winogrande,acc,0.5335438042620363,0.014020826677598103,0 diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_1.json b/83m20b20b/evaluation/rankeval/83m20b20b_1.json new file mode 100644 index 0000000000000000000000000000000000000000..4f1adbe6055b808859465aaa893df7589a552a6a --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_1.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.323, + "acc_stderr": 0.014794927843348635 + }, + "anli_r2": { + "acc": 0.314, + "acc_stderr": 0.014683991951087966 + }, + "anli_r3": { + "acc": 0.3458333333333333, + "acc_stderr": 0.013736245342311014 + }, + "cb": { + "acc": 0.4642857142857143, + "acc_stderr": 0.06724777654937658, + "f1": 0.3251915708812261 + }, + "copa": { + "acc": 0.6, + "acc_stderr": 0.049236596391733084 + }, + "hellaswag": { + "acc": 0.2733519219279028, + "acc_stderr": 0.004447691405592722, + "acc_norm": 0.27912766381198967, + "acc_norm_stderr": 0.004476536569056587 + }, + "rte": { + "acc": 0.5342960288808665, + "acc_stderr": 0.030025579819366422 + }, + "winogrande": { + "acc": 0.5335438042620363, + "acc_stderr": 0.014020826677598103 + }, + "storycloze_2016": { + "acc": 0.5494388027792624, + "acc_stderr": 0.011505771738769861 + }, + "boolq": { + "acc": 0.5214067278287462, + "acc_stderr": 0.008737036492417067 + }, + "arc_easy": { + "acc": 0.3661616161616162, + "acc_stderr": 0.009885391390947719, + "acc_norm": 0.35395622895622897, + "acc_norm_stderr": 0.009812370644174421 + }, + "arc_challenge": { + "acc": 0.17918088737201365, + "acc_stderr": 0.011207045216615665, + "acc_norm": 0.21331058020477817, + "acc_norm_stderr": 0.011970971742326334 + }, + "sciq": { + "acc": 0.62, + "acc_stderr": 0.015356947477797582, + "acc_norm": 0.558, + "acc_norm_stderr": 0.015712507211864214 + }, + "piqa": { + "acc": 0.6115342763873776, + "acc_stderr": 0.011371877593210249, + "acc_norm": 0.5963003264417845, + "acc_norm_stderr": 0.011447407541749086 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_2.csv b/83m20b20b/evaluation/rankeval/83m20b20b_2.csv new file mode 100644 index 0000000000000000000000000000000000000000..5382a5666adc4faa6d55bc33cfc1380fe493e1d7 --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_2.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.327,0.014842213153411249,0 +anli_r2,acc,0.329,0.01486539538592837,0 +anli_r3,acc,0.35583333333333333,0.013826518748493314,0 +arc_challenge,acc,0.17235494880546076,0.011037113093461295,0 +arc_challenge,acc_norm,0.21331058020477817,0.011970971742326334,0 +arc_easy,acc,0.359006734006734,0.009843424713072176,0 +arc_easy,acc_norm,0.33964646464646464,0.00971784562868747,0 +boolq,acc,0.5746177370030581,0.008647125100676093,1 +cb,acc,0.48214285714285715,0.06737697508644648,1 +cb,f1,0.3268398268398269,,1 +copa,acc,0.57,0.049756985195624284,0 +hellaswag,acc,0.2698665604461263,0.004429831152914679,0 +hellaswag,acc_norm,0.2755427205735909,0.004458742356237901,0 +piqa,acc,0.6022850924918389,0.011419114133117223,0 +piqa,acc_norm,0.6006528835690969,0.011427006685027255,0 +rte,acc,0.5018050541516246,0.030096267148976633,0 +sciq,acc,0.611,0.015424555647308493,0 +sciq,acc_norm,0.573,0.015649789644462217,0 +storycloze_2016,acc,0.5451630144307856,0.011515167912227989,0 +winogrande,acc,0.5153906866614049,0.014045826789783665,0 diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_2.json b/83m20b20b/evaluation/rankeval/83m20b20b_2.json new file mode 100644 index 0000000000000000000000000000000000000000..5d955ae867b0eb26036d60d22a920630c231cdfe --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_2.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.327, + "acc_stderr": 0.014842213153411249 + }, + "anli_r2": { + "acc": 0.329, + "acc_stderr": 0.01486539538592837 + }, + "anli_r3": { + "acc": 0.35583333333333333, + "acc_stderr": 0.013826518748493314 + }, + "cb": { + "acc": 0.48214285714285715, + "acc_stderr": 0.06737697508644648, + "f1": 0.3268398268398269 + }, + "copa": { + "acc": 0.57, + "acc_stderr": 0.049756985195624284 + }, + "hellaswag": { + "acc": 0.2698665604461263, + "acc_stderr": 0.004429831152914679, + "acc_norm": 0.2755427205735909, + "acc_norm_stderr": 0.004458742356237901 + }, + "rte": { + "acc": 0.5018050541516246, + "acc_stderr": 0.030096267148976633 + }, + "winogrande": { + "acc": 0.5153906866614049, + "acc_stderr": 0.014045826789783665 + }, + "storycloze_2016": { + "acc": 0.5451630144307856, + "acc_stderr": 0.011515167912227989 + }, + "boolq": { + "acc": 0.5746177370030581, + "acc_stderr": 0.008647125100676093 + }, + "arc_easy": { + "acc": 0.359006734006734, + "acc_stderr": 0.009843424713072176, + "acc_norm": 0.33964646464646464, + "acc_norm_stderr": 0.00971784562868747 + }, + "arc_challenge": { + "acc": 0.17235494880546076, + "acc_stderr": 0.011037113093461295, + "acc_norm": 0.21331058020477817, + "acc_norm_stderr": 0.011970971742326334 + }, + "sciq": { + "acc": 0.611, + "acc_stderr": 0.015424555647308493, + "acc_norm": 0.573, + "acc_norm_stderr": 0.015649789644462217 + }, + "piqa": { + "acc": 0.6022850924918389, + "acc_stderr": 0.011419114133117223, + "acc_norm": 0.6006528835690969, + "acc_norm_stderr": 0.011427006685027255 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_3.csv b/83m20b20b/evaluation/rankeval/83m20b20b_3.csv new file mode 100644 index 0000000000000000000000000000000000000000..0b691567e5333354966d011d1c4194036f86e66f --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_3.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.314,0.014683991951087976,0 +anli_r2,acc,0.37,0.015275252316519362,0 +anli_r3,acc,0.3408333333333333,0.013688600793296939,0 +arc_challenge,acc,0.17491467576791808,0.011101562501828222,0 +arc_challenge,acc_norm,0.20733788395904437,0.011846905782971382,0 +arc_easy,acc,0.3547979797979798,0.009817629113069694,0 +arc_easy,acc_norm,0.34385521885521886,0.009746660584852448,0 +boolq,acc,0.5798165137614679,0.008632912118872552,1 +cb,acc,0.4107142857142857,0.06633634150359541,1 +cb,f1,0.2837426900584795,,1 +copa,acc,0.59,0.049431107042371025,0 +hellaswag,acc,0.2725552678749253,0.0044436393941774234,0 +hellaswag,acc_norm,0.2740489942242581,0.004451222241494051,0 +piqa,acc,0.6011969532100109,0.011424390545037282,0 +piqa,acc_norm,0.5935799782372143,0.011459682650123582,0 +rte,acc,0.49458483754512633,0.030094698123239966,0 +sciq,acc,0.622,0.015341165254026649,0 +sciq,acc_norm,0.56,0.01570498795436179,0 +storycloze_2016,acc,0.5462319615179049,0.011512899199863032,0 +winogrande,acc,0.5067087608524072,0.014051220692330356,0 diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_3.json b/83m20b20b/evaluation/rankeval/83m20b20b_3.json new file mode 100644 index 0000000000000000000000000000000000000000..38625930b1f98c44be6571e4d8087914631e7582 --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_3.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.314, + "acc_stderr": 0.014683991951087976 + }, + "anli_r2": { + "acc": 0.37, + "acc_stderr": 0.015275252316519362 + }, + "anli_r3": { + "acc": 0.3408333333333333, + "acc_stderr": 0.013688600793296939 + }, + "cb": { + "acc": 0.4107142857142857, + "acc_stderr": 0.06633634150359541, + "f1": 0.2837426900584795 + }, + "copa": { + "acc": 0.59, + "acc_stderr": 0.049431107042371025 + }, + "hellaswag": { + "acc": 0.2725552678749253, + "acc_stderr": 0.0044436393941774234, + "acc_norm": 0.2740489942242581, + "acc_norm_stderr": 0.004451222241494051 + }, + "rte": { + "acc": 0.49458483754512633, + "acc_stderr": 0.030094698123239966 + }, + "winogrande": { + "acc": 0.5067087608524072, + "acc_stderr": 0.014051220692330356 + }, + "storycloze_2016": { + "acc": 0.5462319615179049, + "acc_stderr": 0.011512899199863032 + }, + "boolq": { + "acc": 0.5798165137614679, + "acc_stderr": 0.008632912118872552 + }, + "arc_easy": { + "acc": 0.3547979797979798, + "acc_stderr": 0.009817629113069694, + "acc_norm": 0.34385521885521886, + "acc_norm_stderr": 0.009746660584852448 + }, + "arc_challenge": { + "acc": 0.17491467576791808, + "acc_stderr": 0.011101562501828222, + "acc_norm": 0.20733788395904437, + "acc_norm_stderr": 0.011846905782971382 + }, + "sciq": { + "acc": 0.622, + "acc_stderr": 0.015341165254026649, + "acc_norm": 0.56, + "acc_norm_stderr": 0.01570498795436179 + }, + "piqa": { + "acc": 0.6011969532100109, + "acc_stderr": 0.011424390545037282, + "acc_norm": 0.5935799782372143, + "acc_norm_stderr": 0.011459682650123582 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_4.csv b/83m20b20b/evaluation/rankeval/83m20b20b_4.csv new file mode 100644 index 0000000000000000000000000000000000000000..3da3542325535be6298131a53fa64a3dc60c7959 --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_4.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.341,0.014998131348402716,0 +anli_r2,acc,0.348,0.01507060460376841,0 +anli_r3,acc,0.3358333333333333,0.013639261190932887,0 +arc_challenge,acc,0.1766211604095563,0.011144042769316503,0 +arc_challenge,acc_norm,0.21075085324232082,0.011918271754852187,0 +arc_easy,acc,0.3451178451178451,0.009755139387152027,0 +arc_easy,acc_norm,0.3442760942760943,0.009749495321590815,0 +boolq,acc,0.5871559633027523,0.00861117243047287,1 +cb,acc,0.4107142857142857,0.06633634150359541,1 +cb,f1,0.2832151300236407,,1 +copa,acc,0.55,0.04999999999999999,0 +hellaswag,acc,0.27126070503883687,0.004437016600956918,0 +hellaswag,acc_norm,0.27604062935670187,0.004461235175488312,0 +piqa,acc,0.6142546245919478,0.011357166777524042,0 +piqa,acc_norm,0.5935799782372143,0.011459682650123579,0 +rte,acc,0.4548736462093863,0.029973636495415252,0 +sciq,acc,0.619,0.015364734787007436,0 +sciq,acc_norm,0.573,0.01564978964446222,0 +storycloze_2016,acc,0.5451630144307856,0.011515167912227987,0 +winogrande,acc,0.5114443567482242,0.014048804199859332,0 diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_4.json b/83m20b20b/evaluation/rankeval/83m20b20b_4.json new file mode 100644 index 0000000000000000000000000000000000000000..04a48c166b6198085defc9b051fa4150aafe9c79 --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_4.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.341, + "acc_stderr": 0.014998131348402716 + }, + "anli_r2": { + "acc": 0.348, + "acc_stderr": 0.01507060460376841 + }, + "anli_r3": { + "acc": 0.3358333333333333, + "acc_stderr": 0.013639261190932887 + }, + "cb": { + "acc": 0.4107142857142857, + "acc_stderr": 0.06633634150359541, + "f1": 0.2832151300236407 + }, + "copa": { + "acc": 0.55, + "acc_stderr": 0.04999999999999999 + }, + "hellaswag": { + "acc": 0.27126070503883687, + "acc_stderr": 0.004437016600956918, + "acc_norm": 0.27604062935670187, + "acc_norm_stderr": 0.004461235175488312 + }, + "rte": { + "acc": 0.4548736462093863, + "acc_stderr": 0.029973636495415252 + }, + "winogrande": { + "acc": 0.5114443567482242, + "acc_stderr": 0.014048804199859332 + }, + "storycloze_2016": { + "acc": 0.5451630144307856, + "acc_stderr": 0.011515167912227987 + }, + "boolq": { + "acc": 0.5871559633027523, + "acc_stderr": 0.00861117243047287 + }, + "arc_easy": { + "acc": 0.3451178451178451, + "acc_stderr": 0.009755139387152027, + "acc_norm": 0.3442760942760943, + "acc_norm_stderr": 0.009749495321590815 + }, + "arc_challenge": { + "acc": 0.1766211604095563, + "acc_stderr": 0.011144042769316503, + "acc_norm": 0.21075085324232082, + "acc_norm_stderr": 0.011918271754852187 + }, + "sciq": { + "acc": 0.619, + "acc_stderr": 0.015364734787007436, + "acc_norm": 0.573, + "acc_norm_stderr": 0.01564978964446222 + }, + "piqa": { + "acc": 0.6142546245919478, + "acc_stderr": 0.011357166777524042, + "acc_norm": 0.5935799782372143, + "acc_norm_stderr": 0.011459682650123579 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_5.csv b/83m20b20b/evaluation/rankeval/83m20b20b_5.csv new file mode 100644 index 0000000000000000000000000000000000000000..a8e053a321baea2065cb069c1b6b198ffc712242 --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_5.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.346,0.015050266127564441,0 +anli_r2,acc,0.332,0.014899597242811468,0 +anli_r3,acc,0.35833333333333334,0.013848054140053424,0 +arc_challenge,acc,0.18344709897610922,0.01131017017955454,0 +arc_challenge,acc_norm,0.20733788395904437,0.011846905782971382,0 +arc_easy,acc,0.3653198653198653,0.009880576614806928,0 +arc_easy,acc_norm,0.34974747474747475,0.009785578618940732,0 +boolq,acc,0.5853211009174312,0.008616791778981309,1 +cb,acc,0.4642857142857143,0.06724777654937658,1 +cb,f1,0.30574033013057406,,1 +copa,acc,0.55,0.04999999999999998,0 +hellaswag,acc,0.27086237801234814,0.0044349692574466165,0 +hellaswag,acc_norm,0.27693686516630156,0.004465704810893542,0 +piqa,acc,0.6001088139281828,0.011429608127857663,0 +piqa,acc_norm,0.5908596300326442,0.011471593460443312,0 +rte,acc,0.5270758122743683,0.030052303463143706,0 +sciq,acc,0.605,0.015466551464829344,0 +sciq,acc_norm,0.574,0.015645087688113814,0 +storycloze_2016,acc,0.5440940673436665,0.011517383123961531,0 +winogrande,acc,0.5019731649565904,0.014052376259225632,0 diff --git a/83m20b20b/evaluation/rankeval/83m20b20b_5.json b/83m20b20b/evaluation/rankeval/83m20b20b_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a21b83e5cbb79dff2d42cdd139cdb7fe9fac0898 --- /dev/null +++ b/83m20b20b/evaluation/rankeval/83m20b20b_5.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.346, + "acc_stderr": 0.015050266127564441 + }, + "anli_r2": { + "acc": 0.332, + "acc_stderr": 0.014899597242811468 + }, + "anli_r3": { + "acc": 0.35833333333333334, + "acc_stderr": 0.013848054140053424 + }, + "cb": { + "acc": 0.4642857142857143, + "acc_stderr": 0.06724777654937658, + "f1": 0.30574033013057406 + }, + "copa": { + "acc": 0.55, + "acc_stderr": 0.04999999999999998 + }, + "hellaswag": { + "acc": 0.27086237801234814, + "acc_stderr": 0.0044349692574466165, + "acc_norm": 0.27693686516630156, + "acc_norm_stderr": 0.004465704810893542 + }, + "rte": { + "acc": 0.5270758122743683, + "acc_stderr": 0.030052303463143706 + }, + "winogrande": { + "acc": 0.5019731649565904, + "acc_stderr": 0.014052376259225632 + }, + "storycloze_2016": { + "acc": 0.5440940673436665, + "acc_stderr": 0.011517383123961531 + }, + "boolq": { + "acc": 0.5853211009174312, + "acc_stderr": 0.008616791778981309 + }, + "arc_easy": { + "acc": 0.3653198653198653, + "acc_stderr": 0.009880576614806928, + "acc_norm": 0.34974747474747475, + "acc_norm_stderr": 0.009785578618940732 + }, + "arc_challenge": { + "acc": 0.18344709897610922, + "acc_stderr": 0.01131017017955454, + "acc_norm": 0.20733788395904437, + "acc_norm_stderr": 0.011846905782971382 + }, + "sciq": { + "acc": 0.605, + "acc_stderr": 0.015466551464829344, + "acc_norm": 0.574, + "acc_norm_stderr": 0.015645087688113814 + }, + "piqa": { + "acc": 0.6001088139281828, + "acc_stderr": 0.011429608127857663, + "acc_norm": 0.5908596300326442, + "acc_norm_stderr": 0.011471593460443312 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..575d590cceb8cefa6a234c92a3200cfdda58764d --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73f6bd9b09de08520e818a1d7134adb0749979700aa88691f6615b3e4482c4f5 +size 15518743 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..79b931f18318b4087cb01ebeef9a3c22640cedb2 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16928b58e30587aedefc1d8471b06808a563b84aae1a8cac89c380540b3e5aee +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..df8d2e28405b43cefee963f839dc651704892daa --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74dfeb5ad9ae9f82f4db3639fae8ad9f1134921cc511edb9ed17f4860bd949e +size 15518626 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecd85d1117ac325db6d38a965d1728de572d683e --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d1e0b8227d63a9958b4f66db98f599fdb34a7151e9fae215eefda3dd7a3a4a +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..03d2a319f7e61d27114750457598dfe6332ffde7 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f198df64ce314b1c9aa3f1e82c7375d0d212ce318ec9c0766264045a79f54d7 +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..15daff3ce7595d9e41db632f62d05a2004df3528 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6bb2ef1145a4d69c19c3503fa11f37d8a928d00ecbd58ead7e447c9e4c92d5 +size 15518626 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..039f890804ee0db374ec7e714bc543969c5e2021 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353325f4160a8cc3d9610cb19e6dce5e9299236bcfd710ec3a0c3bfe35298083 +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa2987e2307d0efdf43b05f661997fc3fb3a46d4 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e01141ffd9ff202f781af062aa63338f78038bb9dd8a8b6fe796053ca40398 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac309f38d1c5701ffc4960b6953a024b75b3a8a1 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f16f5a9751a7b3c76ac85ec58a129d15adbe91a43d6816b5718c7e98638d71b1 +size 15518626 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..621e205e9020748745b29f271213fa59118e4340 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447f8a33d6ad6c8eccfe0c09c818f446281ef2a5d3aca5af6e30e6813f6960fa +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ec38f3ce58ff56f19ee8f6581b5852ce3420a4e --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7af5e68bf8b82e728993317a6ef787bb69d4ec290cdf2f3d96786bfdcce36d +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c407e168b80f630db0d553975fbc019d9c42b9d0 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459a85a641174381d57fd02afba84938352014a047a430d97723c8be32645f2c +size 15518615 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..645eac5190637405a26f4dee4edb416dc9b36c24 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25640c1eb1bc770da8b5ff0ca572b521b5b12fd09248df78ba806b9a56bb1805 +size 15518626 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7271f71de5844243b935fcb761c5cbadd7264d6 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a9dbc66abf4c259bb2009140776cc63f11d4b571b4616a42215047f1af0b71 +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4e078d49b4062a3618cc3bbf5941d7cd8e27ec7 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420363acf823af95f034954000f7bd6f32dc11a7c438247e5ac1fe1f34bdd51e +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c22508a682af1fce1c4d5727d176a49a0534d181 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5e03b080e91d72fd5b8a6c83de4867d9d745f31741a7012a82208da6552c5c +size 15518626 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3eee76ef1b5b830109ac45b5c389275b577dd7e --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf5bea6dc24ba6e9b2426b4f882a77566bc25a7bb6f1a6c550757c91359b94a +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c918b866f2c87645b64f1710a0a4f5990851501 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303d9cc7c9a621c9d7611b66dd41803c93b9ec5c06916b8a2056797c19e20d34 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6170b1d28220a4dcc3ca8106ca90cb2332d0e2b1 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29a48bd7d2f0df0748e055b4ff3693b7cd9c3de16af39ad77f568b1b57a8521 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a892302494e82024bab1bdf9f6fb56ddfe6d519 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:834daf99a056b645e67e45aaf63de3d5255b9261dd16079e2c584cbe8ac3bbb9 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..84a626e1c334636817fe9676d24c196cc4a56348 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfe396234208c776b014c6cea49ed6bf3d1c36654e6110206aa748b4e95a8fdd +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cccf2c8a02a941dbd581d66dfa2260f0ebb79267 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e72e5241598c261de16e7b5164d442f07565f1140718a2a3d6102a6f4eddcd0c +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de88480aa16c4caf6b78089232ca6ad05de31106 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5bb4a423f5104b4803586a9ddaaa55a5745284c9cff01b1fc5392b60f6ec43 +size 15518743 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..13bf40b3819339aa66e35e0a42d6a08c105362d7 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5824facc385c0110ea07c2e99640a54dc3ce60a0fb4c3e34e00f8c9be303c7f8 +size 15518626 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd675286cee4be2fc6f04db84051afce100849df --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc844d432a1edfa87026b0235c03d685799c57d49ab5d43745d54a8795b0ed0 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d06c208a51657d54f3a9d870f7a22ab0bcf2e6e2 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2e6969812b7806930412c68d84cd30f90afaeef6a50d3a485cac67295dbe3f +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fcfbd08aaac0665e5fef8db08487391618520e3 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ab715646f2627b99c0dccf1389356109b16c294b07081f4fddbc580d571bf7 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6f7cf5f4faf4a20e1aa3b205c52e207b060fc3a --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eabc78b02ec01713390dfe3081ac96ac914e4f7133e125bdf7248a610211fafd +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..335ecc3d20bea01acc13d0a68477b54a49736701 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caff9fabec1cd88725fcbc965aa470f3dee94d590b1172742905e78bfc3dbfe6 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..87a00154e8ae597e775a872f989d24a09ad0784a --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c702f3beddc3e62d191c693a0f310712abe3a7dd691708856b115abd88490232 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebf57679b30e9c8f9a3e3f31e13e761ed796b5ea --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc749b47cdabdc70fbf0cbd048bf803fed71fcdb3b35ec5261d5dcb2184f8f7f +size 15518818 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab916dbf53cbf136373e5f0f446ec778d02aede3 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab0df586456e1ded450a8655a64e18108064e2196dc54b318254d43141ea9dd +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf4eea3c1729691beb444b9df3b1a1167d5aeb99 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28df1030c03fbf5f01c2884f584605d04785a8c4322a62f94a174a2db87997bd +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b0463ef74ff31f210704ed3f68968c4ef38ed70 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75808f778252578bf7e98e3e86a37ada92bafdfc58e410c055b09b1f599170e +size 15518743 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a2509c112aef4df912dd70a5c5e3226dd11a50b --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b3665b3965616b9a44e301888882ffbb0bac1ce351789b71ab9d4dbc6d31af +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..48852f0a585962ad73f13b7845de94d01e417089 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031ef93b0e3a0a073de69782a5c712a5648c0e81b12da125b8cbe60cc0cb2ff8 +size 15518818 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..417f8b5220c4d416298d931c2b80dbfddc7202f1 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd379820e0881e953655a46daa92929b36792ea431a448400d3660d4cadb26ef +size 15518626 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..75ab255230bcda5fb4021ab204f814c1f9aee303 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d006ca2eae7577f38025556a5020747e09b04d60354b4cc38d6b3209ad83036 +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f79b25b0d406d69b6c9970fc5cd0e3e7f77e4b8c --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:062d7317adfba4185aea6367d2ce6d19f970e08eb8e46d5c457377e8135a88c1 +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fe42ad4b39610908942009525c2eeb6987cfc21 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7737fad02770b52ad0340f8cd8ab36b51d56585eb660a6ad67c3b311dc26ed3 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6922860e184ab7d4d167252cb67842c6a03cd780 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8248916fcb2b5e9e75b458c7bb925e867c8506f5e13e46e4426b8fea0e2404b7 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6af7f318b502b36573291b44800eaad7a466198 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1e7e8f57dece18d686d5b30d3cd6fe26643c07a53a036622a1a93f7068c73a +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..58d5d2d2c66feb8e792661fd650bc986a225e141 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00cc0e0899c2437233019cc925ce4f3b1cd6f38b5a08b6bd0193fa379f88cf2 +size 15518562 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a97b380c56a475702de1713f555c1718a789086 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:909b3e59a55d6af5f87b55f4ea3ede53ab391f5563686fd95369e450c5a2b708 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..709bf7e19d482440a60b02c023faeed29c296792 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f5960d271fd2aa1cbea7d319a17a7d1b58743fc1ac12d65959b0258088e8cf +size 15518615 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..35ee1c918d1544c4f3c30f76be65e1bff1243d7b --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872f834b6007b127720380ad13970d91f789d94362d49860976ed738a5e2fd9f +size 15518818 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e2d77a8c5605f06c574cb9fff8ef0f69e0bfea4 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d6152665c0c2b331ef1558a9c836a635f56255746f8c4f9f31a737f41d1d45 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0c5f8010041594ae90d912d7890fc9292cefba4 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0dc9158e0057ccedcafff94a3592aff2de6f7156c1b3b86692688e53742a3dd +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c80e75099eff0ee30f7703734ee13ff02fe8c9c --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69815a69dacf6d7427e32054ca7d0cebfacecb7be67a3dc0d57571dfe9a0f9b2 +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a198f4ae201380d586b449b4e0d6ab90bb69c0e3 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c576a5a65f84ea76e9febfde86711b7a89eee1a79f7c46d2599133252d1b9d +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8dd631f3e23d382ace6d384d99b8628ba279e0a8 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a226bf940e72a7a0aaf252572329e63ddfcb59590afc8501bc411de11a7853 +size 15518626 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..85170ede21ece7c34097dcf5fa0578bdaa58fcf0 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b585cede405ed1b946de4d182fcd277cdedad1191434df6f5428ad524cb4707e +size 15518818 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..77e4b228f429c74fb2529cf907f56f99e487cdd7 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6f24274e98f1a985b684fac29aace896aa8ea6736921ee5a1cbaae44237a86 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e354f59d2e86f19dfb9a0e6255bab58821027983 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:975c3555c25ee5ce025699eebbd1f81b7f7b059711e8dff2d99f8fa86221e4eb +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..21b913f431bbc550b8e9063b75b89756b0981783 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f378109e5d8c744f8c41010c6ff8a265f040b9827c80bc41def5fe6a26329763 +size 15518754 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95565137f16cee6589c5316a6bd54994edb98436 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eddd6cf466376a9823a9ed9fb1515723982a68dcd6379ee4eb0a218f8a3e893f +size 15518679 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5f3a855757cfc04335af5fc41c05a747213e999 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c0fce39f84236f09a4ac8717159baa29615a62ac6e66b81686a4486bcdbe79 +size 15518818 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1f6cd436e8f0c506888c2d3826dc246a3504af7 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09bf6b3e8394e0fa10c48bfd6b0ed165e69801d1749174f1e2d222193e0349ab +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8ce1e4debde159294a21258509b62adb7f8d430 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d4f2530c6f24f1d1a0523ce90ab9230d78f8b2c34ecd0cad0267296e1e6381 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d2f90af1b819fd112c769431d493f3514338ed5 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4fa1041e1959eb6e5d2c2f4570acb788aa2bb84c95737ada716981f77f9b51 +size 15518690 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ae08c93640adf8d3f0970c25038985d37655ee6 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a558259c156133383f0f812b6e0ae4dc00c4b628e6766ef6d71bf10cc0ee931 +size 15518743 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d22c122ef8245742b5f9901d8b525607a15dc3c6 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6641cd8a4c9e74f9985da9c8807a15770a26be5b52772786ab1895f0b622b45 +size 15518679 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fd60d1f47d41dd36ebb2229c8fb9a5980161a46 --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6d7b448714188ce8f2014703609f41f46f606f14cacc49f70b625010bd7040 +size 15518679 diff --git a/83m20b20b/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/83m20b20b/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..89b0a8ecd6b865c1a22d37f0269863679619f11b --- /dev/null +++ b/83m20b20b/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd4625808ee49be37b5886334cef95456188c45b901f41c442af4d1d56c75ec +size 15518743 diff --git a/83m20b20b/global_step37905/layer_01-model_00-model_states.pt b/83m20b20b/global_step37905/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..298b70096cb5b0ea963aabcfa0fb0f0e298738b2 --- /dev/null +++ b/83m20b20b/global_step37905/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c06b373c98365ba7cab2a4c5eb4401ce073cc1a1f3e2dda7b9801494359c428 +size 67011843 diff --git a/83m20b20b/global_step37905/layer_03-model_00-model_states.pt b/83m20b20b/global_step37905/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..09d7e61738cefee7bb790517f6b0aec6166a7326 --- /dev/null +++ b/83m20b20b/global_step37905/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad4bcdf52c7e8252cde9e9d942505ddf51a4b37cc0f474a9a0923f22b9d095cb +size 9851395 diff --git a/83m20b20b/global_step37905/layer_04-model_00-model_states.pt b/83m20b20b/global_step37905/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..18d3f142b4550f8d84d0242d7f814061f889f05b --- /dev/null +++ b/83m20b20b/global_step37905/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d4ce91f2f71831e1e6c957da2f116ff67fc4f08d46d2cee686374b5823a104 +size 9851395 diff --git a/83m20b20b/global_step37905/layer_05-model_00-model_states.pt b/83m20b20b/global_step37905/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e38f388f5c172894ea134906ccfe6e2a4ae818c5 --- /dev/null +++ b/83m20b20b/global_step37905/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f41ff83b0caa31c52fc4f226010968166bc3b88e85c6b71833d01a5390a5b3 +size 9851395 diff --git a/83m20b20b/global_step37905/layer_06-model_00-model_states.pt b/83m20b20b/global_step37905/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbca3bfddf338c14cbb86c03f7c1400bcf73882c --- /dev/null +++ b/83m20b20b/global_step37905/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:417fa06be63f78a0fb7fb0e8ee609bba4e6bded953532a6c7d1605221361e812 +size 9851395 diff --git a/83m20b20b/global_step37905/layer_07-model_00-model_states.pt b/83m20b20b/global_step37905/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d9a8d5788c80ebbf217ec846c9aac8c4a334278 --- /dev/null +++ b/83m20b20b/global_step37905/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b33a40dfb7d83a3839ff93edd89eb669a276caf941cf8daf21be5279191e96 +size 9851395 diff --git a/83m20b20b/global_step37905/layer_08-model_00-model_states.pt b/83m20b20b/global_step37905/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c94a696a55580df48b22119e68750a797f76993b --- /dev/null +++ b/83m20b20b/global_step37905/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3df7bde9d1dcc5f93b685f05f904b4ac8a34de5a36a6656ff03652c95cb84b0 +size 9851395 diff --git a/83m20b20b/global_step37905/layer_09-model_00-model_states.pt b/83m20b20b/global_step37905/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0d088ea723f40fa29e6fbb567f22dba365bb5ba --- /dev/null +++ b/83m20b20b/global_step37905/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3df67553c218620af0b3cae3f239572154282ffe43664dd336924d8d9d3a5c8 +size 9851395 diff --git a/83m20b20b/global_step37905/layer_10-model_00-model_states.pt b/83m20b20b/global_step37905/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..47a3dfe78da9931e9f9f6fc09a534b377480afdb --- /dev/null +++ b/83m20b20b/global_step37905/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379463f7eaf2187b69a5c8835b8ef62b87b5769c204eb962b6313706dd9ef85b +size 9851395 diff --git a/83m20b20b/global_step37905/layer_11-model_00-model_states.pt b/83m20b20b/global_step37905/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..99563ba69a65fa5a5e3981fdaae5b4ff34b2bcf4 --- /dev/null +++ b/83m20b20b/global_step37905/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e647e91440d7887502b9401d11591bbcca34c78407cd639d18edd895e9248c5 +size 9851395 diff --git a/83m20b20b/global_step37905/layer_12-model_00-model_states.pt b/83m20b20b/global_step37905/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a88a10ec8d25a7f1bef530fb0805c154ce19b6d0 --- /dev/null +++ b/83m20b20b/global_step37905/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c64022fa245bc4a4a175cdec2dc4318c5af110c9f1582b3b9e992efb3a83672d +size 9851395 diff --git a/83m20b20b/global_step37905/layer_14-model_00-model_states.pt b/83m20b20b/global_step37905/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ecbe9b821791b578f221777b95d68aaad7b6811 --- /dev/null +++ b/83m20b20b/global_step37905/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90f34ce01f1135734304f6afda35013b817b4dc97abb66ab93b9e10eaefa0ae +size 3779 diff --git a/83m20b20b/global_step37905/mp_rank_00_model_states.pt b/83m20b20b/global_step37905/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c7f23165b348765554ec7731b5260285e2890b4 --- /dev/null +++ b/83m20b20b/global_step37905/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08882b5c2c256891256a89b4fb5f7f1f62cf6610c20de79d7f6c647755096df +size 31603 diff --git a/83m20b20b/l/events.out.tfevents.1675514118.nid006279.12906.0 b/83m20b20b/l/events.out.tfevents.1675514118.nid006279.12906.0 new file mode 100644 index 0000000000000000000000000000000000000000..4a044636acfa5ca073bb7b7dcc256be60affe805 --- /dev/null +++ b/83m20b20b/l/events.out.tfevents.1675514118.nid006279.12906.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:077a5642f7b049cbc8d57a9f1d9ea852c96f787e6cae5f2dc2da1c634914c101 +size 67726209 diff --git a/83m20b20b/logs/2820863.err b/83m20b20b/logs/2820863.err new file mode 100644 index 0000000000000000000000000000000000000000..cfd534dd883de0204f6604a79a818fa79e2ac266 --- /dev/null +++ b/83m20b20b/logs/2820863.err @@ -0,0 +1,1116 @@ +4: 2023-02-09 22:41:42.753353: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:42.753375: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:42.753444: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:42.753471: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:42.753463: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:42.753472: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:42.753497: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:41:42.753509: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:42.754889: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:42.754888: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:42.754914: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:42.754925: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:42.754936: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:42.754944: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:42.754951: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:42.754959: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:42.755242: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:42.755253: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:42.755251: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:42.755261: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:42.755262: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:42.755279: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:42.755281: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:41:42.755289: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:42.755376: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:42.755386: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:42.755397: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: 2023-02-09 22:41:42.755560: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:42.755558: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:42.755554: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: 2023-02-09 22:41:42.755551: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:42.755552: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:42.755572: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:42.755586: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:42.755591: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:42.755421: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:42.755581: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:42.755604: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:41:42.755598: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: 2023-02-09 22:41:42.755418: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:42.755413: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:42.755437: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:42.755580: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:41:42.755470: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:42.755592: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:42.755589: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:42.755764: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:42.755783: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:42.755793: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: 2023-02-09 22:41:42.755611: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:41:42.755645: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:42.755838: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:42.755862: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:42.755874: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:42.755878: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:41:42.755883: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:42.756447: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:42.756457: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:42.756463: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:42.756485: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:42.756480: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:42.756487: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:42.756499: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:41:42.756503: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:41:44.462818: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:44.462822: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:44.462827: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:44.462827: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:44.462832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:44.462840: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:44.462838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:44.462832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:41:44.463429: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:44.463430: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:44.463432: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:44.463436: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:44.463437: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:44.463439: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:44.463441: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:41:44.463445: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:44.545841: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:44.545849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:44.545847: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:44.545855: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:44.545850: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:44.545861: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:44.545857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:44.545859: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:41:44.546284: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:44.546289: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:44.546293: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:44.546296: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:44.546302: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:44.546302: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:44.546302: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:41:44.546308: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:44.552792: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:44.552792: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:44.552800: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:44.552799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:44.552799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:44.552802: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:44.552802: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:44.552806: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:41:44.553163: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:44.553161: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:44.553171: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:44.553170: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:44.553169: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:44.553174: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:44.553179: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:41:44.553182: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:44.555464: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:44.555471: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:44.555482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:44.555475: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:44.555471: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:44.555484: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:44.555482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:44.555477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:41:44.555870: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:44.555873: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:44.555877: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:44.555877: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:44.555878: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:44.555881: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:44.555882: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:41:44.555886: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:44.561910: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:44.561908: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:44.561905: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:44.561915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:44.561916: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:44.561923: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:44.561919: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:44.561926: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:41:44.562309: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:44.562313: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:44.562315: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:44.562319: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:44.562318: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:44.562318: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:44.562321: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:41:44.562324: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:44.562363: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:44.562373: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:44.562368: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:44.562376: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:44.562383: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:44.562381: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:44.562380: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:44.562377: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:41:44.562760: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:44.562761: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:44.562769: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:44.562770: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:44.562772: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:44.562775: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:44.562780: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:41:44.562782: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:44.567746: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:44.567747: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:44.567742: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:44.567759: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:44.567754: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:44.567755: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:44.567762: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:44.567756: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:41:44.568150: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:44.568152: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:44.568155: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:44.568158: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:44.568160: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:44.568158: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:44.568162: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:41:44.568165: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:44.605461: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:44.605459: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:44.605467: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:44.605468: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:44.605464: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:44.605472: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:44.605457: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:44.605466: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:41:44.605658: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:44.605663: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:44.605664: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:44.605665: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:44.605667: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:44.605669: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:44.605670: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:41:44.605672: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:42:03.062844: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.062860: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.062873: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.062879: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.062883: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.062890: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.062897: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.062901: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.063825: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.063822: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.063830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.063835: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.063832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.063834: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.063833: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.063837: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.064230: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.064233: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.064241: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.064239: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.064244: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.064244: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.064240: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.064240: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.064598: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.064600: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.064609: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.064610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.064614: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:42:03.064824: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.064610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.064619: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:42:03.064830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.064616: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:42:03.064835: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.064842: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.064843: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.064844: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.064846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.064852: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.065078: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.065086: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.065089: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.065086: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.065096: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.065095: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.065100: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.065104: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068382: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068389: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068390: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068397: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.068394: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-09 22:42:03.070358: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070360: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070361: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070362: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070366: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070366: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070369: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070374: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070374: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070377: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070371: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:42:03.070384: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070385: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070384: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070388: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:42:03.070391: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.084485: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.084489: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.084490: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.084492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.084502: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.084491: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.084494: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.084496: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.084500: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.084497: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:42:03.084509: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.084511: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.084515: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.084515: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.084517: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:42:03.084518: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.093034: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093178: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-09 22:42:03.093036: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.093037: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093181: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.093316: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.093041: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093184: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.093038: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:42:03.093319: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093182: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.093045: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:42:03.093321: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093187: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.093049: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093194: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093197: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.093047: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:42:03.093321: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093191: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.093051: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.093054: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.093058: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.093058: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:42:03.093060: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093322: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093191: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-09 22:42:03.093064: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:42:03.093291: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:42:03.093323: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093191: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093200: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093205: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093323: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:42:03.093205: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093210: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:42:03.093211: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:42:03.093212: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.093323: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-09 22:42:03.093307: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:42:03.093331: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093332: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093337: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093340: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093341: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093342: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:42:03.093346: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068401: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.068396: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068403: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.068399: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:42:03.068407: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.068406: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.068410: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.068414: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:42:03.068415: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.142440: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.142477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.142492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.142498: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.142505: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.142624: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.142631: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.142646: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.143719: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.143736: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.143758: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.143769: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.143783: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.143787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.143795: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.143813: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144591: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144592: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144590: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144591: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144609: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.144610: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.144611: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.144605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:42:03.144613: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.144615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.144614: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.144618: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:42:03.144624: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.145470: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.145473: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.145472: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.145475: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.145478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.145481: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.145484: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.145500: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.145501: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.145503: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.145504: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.145505: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.145505: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.145508: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:42:03.145550: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:42:03.145572: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: +7: +7: +5: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +5: Building extension module utils... +5: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +5: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +6: Loading extension module utils... +4: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +5: +5: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: Loading extension module utils... +5: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +5: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +5: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +7: Loading extension module utils... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +2: +2: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +4: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +4: +4: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +3: +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +4: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +1: +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/83m20b20b/logs/2820863.out b/83m20b20b/logs/2820863.out new file mode 100644 index 0000000000000000000000000000000000000000..a710eb4ef8b183addbf27e66e2a14129aa2b4dfe --- /dev/null +++ b/83m20b20b/logs/2820863.out @@ -0,0 +1,4384 @@ +Model parameters: d_model 640 ffw_size 2560 kv_size 64 n_heads 10 n_layers 10 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 10 --hidden-size 640 --num-attention-heads 10 --kv-channels 64 --ffn-hidden-size 2560 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --loss-scale 12 --clip-grad 1.0 --kill-switch-path kill-switch-83m20b20bval --bf16 --checkpoint-activations --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_83m20b20bval --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_83m20b20b --load checkpoints_83m20b20b --train-weighted-split-paths-path train20b.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/2820863.json --zero-stage 0 +START 2820863: Thu 09 Feb 2023 10:40:35 PM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 45.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 37.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 46.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 46.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 45.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 45.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 43.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 46.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 39.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 46.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 45.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 47.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 43.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 45.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 43.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 40.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 44.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 42.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 42.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 39.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 44.0c 98.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 42.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 45.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 38.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 53.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 36.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +6: Launching on nid006024 (6/8), master nid006018 port 9999, GPUs 8, CUDA: True +1: Launching on nid006019 (1/8), master nid006018 port 9999, GPUs 8, CUDA: True +0: Launching on nid006018 (0/8), master nid006018 port 9999, GPUs 8, CUDA: True +5: Launching on nid006023 (5/8), master nid006018 port 9999, GPUs 8, CUDA: True +7: Launching on nid006025 (7/8), master nid006018 port 9999, GPUs 8, CUDA: True +3: Launching on nid006021 (3/8), master nid006018 port 9999, GPUs 8, CUDA: True +4: Launching on nid006022 (4/8), master nid006018 port 9999, GPUs 8, CUDA: True +2: Launching on nid006020 (2/8), master nid006018 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... True +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2820863.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... True +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 2560 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 640 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-83m20b20bval +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_83m20b20b +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... 12.0 +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 10 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 10 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_83m20b20b +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_83m20b20bval +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-09 22:43:05,353] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +7: > setting tensorboard ... +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.160 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: ninja: no work to do. +0: >>> done with compiling and loading fused kernels. Compilation time: 18.218 seconds +0: time to initialize megatron (seconds): -17.487 +0: [after megatron is initialized] datetime: 2023-02-09 22:43:26 +0: building GPT model ... +0: [2023-02-09 22:43:26,650] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-09 22:43:26,651] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-09 22:43:26,651] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.67 GB, percent = 6.1% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-09 22:43:28,670] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=17 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: undo +0: 14: MixedFusedLayerNorm +0: 15: EmbeddingPipe +0: 16: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-09 22:43:28,951] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-09 22:43:28,952] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-09 22:43:28,952] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.69 GB, percent = 6.1% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-09 22:43:28,953] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-09 22:43:40,882] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-09 22:43:40,882] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-09 22:43:40,882] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-09 22:43:40,885] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-09 22:43:40,885] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-09 22:43:41,006] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-09 22:43:41,006] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-09 22:43:41,007] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.37 GB, percent = 6.2% +5: ninja: no work to do. +5: Time to load utils op: 0.2788212299346924 seconds +0: Time to load utils op: 0.21073555946350098 seconds +0: Time to load utils op: 0.20302367210388184 secondsTime to load utils op: 0.20323967933654785 seconds +0: +0: Time to load utils op: 0.20363140106201172 seconds +0: Time to load utils op: 0.2031230926513672 seconds +0: Time to load utils op: 0.20381450653076172 seconds +0: Time to load utils op: 0.20409870147705078 seconds +0: Time to load utils op: 0.20380544662475586 seconds +5: Time to load utils op: 0.20273780822753906 seconds +5: Time to load utils op: 0.2033226490020752 seconds +5: Time to load utils op: 0.20342421531677246 seconds +5: Time to load utils op: 0.20376038551330566 seconds +5: Time to load utils op: 0.20382428169250488 seconds +5: Time to load utils op: 0.20385241508483887 seconds +5: Time to load utils op: 0.20360517501831055 seconds +1: Time to load utils op: 0.2121117115020752 seconds +1: Time to load utils op: 0.21212458610534668 seconds +1: Time to load utils op: 0.21208763122558594 seconds +1: Time to load utils op: 0.21213912963867188 secondsTime to load utils op: 0.21210408210754395 seconds +1: Time to load utils op: 0.2121272087097168 seconds +1: +1: Time to load utils op: 0.21212148666381836 seconds +1: Time to load utils op: 0.21216034889221191 seconds +2: Time to load utils op: 0.21176528930664062 seconds +2: Time to load utils op: 0.21177053451538086 seconds +2: Time to load utils op: 0.21179628372192383 seconds +2: Time to load utils op: 0.21179962158203125 seconds +2: Time to load utils op: 0.2118227481842041 secondsTime to load utils op: 0.21177935600280762 seconds +2: +2: Time to load utils op: 0.21182894706726074 seconds +2: Time to load utils op: 0.2118391990661621 seconds +3: Time to load utils op: 0.21171164512634277 secondsTime to load utils op: 0.21169519424438477 seconds +3: +3: Time to load utils op: 0.21174049377441406 secondsTime to load utils op: 0.2117462158203125 seconds +3: +3: Time to load utils op: 0.21174311637878418 seconds +3: Time to load utils op: 0.211747407913208 secondsTime to load utils op: 0.21175408363342285 seconds +3: +3: Time to load utils op: 0.21175718307495117 seconds +4: Time to load utils op: 0.2106490135192871 secondsTime to load utils op: 0.21064972877502441 seconds +4: +4: Time to load utils op: 0.2106621265411377 seconds +4: Time to load utils op: 0.21067047119140625 seconds +4: Time to load utils op: 0.21068644523620605 seconds +4: Time to load utils op: 0.21071362495422363 secondsTime to load utils op: 0.2106485366821289 seconds +4: +4: Time to load utils op: 0.21068549156188965 seconds +6: Time to load utils op: 0.21081852912902832 secondsTime to load utils op: 0.21079397201538086 seconds +6: +6: Time to load utils op: 0.21084237098693848 secondsTime to load utils op: 0.21082448959350586 seconds +6: +6: Time to load utils op: 0.2108442783355713 seconds +6: Time to load utils op: 0.21088576316833496 seconds +6: Time to load utils op: 0.21086454391479492 secondsTime to load utils op: 0.21086835861206055 seconds +6: +7: Time to load utils op: 0.21028900146484375 secondsTime to load utils op: 0.2103128433227539 seconds +7: +7: Time to load utils op: 0.21031737327575684 seconds +7: Time to load utils op: 0.21032333374023438 seconds +7: Time to load utils op: 0.2103748321533203 secondsTime to load utils op: 0.21033930778503418 seconds +7: Time to load utils op: 0.21033787727355957 seconds +7: +7: Time to load utils op: 0.21034932136535645 seconds +0: [2023-02-09 22:43:41,331] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-09 22:43:41,332] [INFO] [utils.py:828:see_memory_usage] MA 0.16 GB Max_MA 0.16 GB CA 0.17 GB Max_CA 0 GB +0: [2023-02-09 22:43:41,332] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.38 GB, percent = 6.2% +5: Time to load utils op: 0.0004715919494628906 secondsTime to load utils op: 0.0003848075866699219 seconds +5: +5: Time to load utils op: 0.00037550926208496094 seconds +5: Time to load utils op: 0.0004470348358154297 seconds +0: Time to load utils op: 0.0005021095275878906 seconds +5: Time to load utils op: 0.0005233287811279297 seconds +5: Time to load utils op: 0.0005662441253662109 seconds +5: Time to load utils op: 0.0005753040313720703 seconds +0: Time to load utils op: 0.0006189346313476562 seconds +5: Time to load utils op: 0.0005066394805908203 seconds +0: Time to load utils op: 0.0005486011505126953 seconds +0: Time to load utils op: 0.0004062652587890625 seconds +0: Time to load utils op: 0.0004031658172607422 seconds +0: Time to load utils op: 0.0006475448608398438 seconds +0: Time to load utils op: 0.0006635189056396484 seconds +3: Time to load utils op: 0.0010609626770019531 seconds +6: Time to load utils op: 0.0010859966278076172 seconds +2: Time to load utils op: 0.0008759498596191406 seconds +4: Time to load utils op: 0.0009922981262207031 seconds +2: Time to load utils op: 0.0008630752563476562 secondsTime to load utils op: 0.0008516311645507812 seconds +2: +4: Time to load utils op: 0.001065969467163086 seconds +7: Time to load utils op: 0.0011894702911376953 seconds +7: Time to load utils op: 0.0012087821960449219 seconds +6: Time to load utils op: 0.00119781494140625 seconds +2: Time to load utils op: 0.0010669231414794922 secondsTime to load utils op: 0.0010499954223632812 seconds +2: +2: Time to load utils op: 0.001054525375366211 seconds +2: Time to load utils op: 0.0010304450988769531 seconds +3: Time to load utils op: 0.0014181137084960938 seconds +2: Time to load utils op: 0.001092672348022461 seconds +6: Time to load utils op: 0.0014178752899169922 seconds +6: Time to load utils op: 0.0014157295227050781 seconds +4: Time to load utils op: 0.0012726783752441406 seconds +4: Time to load utils op: 0.001308441162109375 seconds +4: Time to load utils op: 0.0013117790222167969 seconds +6: Time to load utils op: 0.0013909339904785156 seconds +3: Time to load utils op: 0.0015294551849365234 seconds +3: Time to load utils op: 0.0015211105346679688 secondsTime to load utils op: 0.0015451908111572266 seconds +3: +3: Time to load utils op: 0.0015189647674560547 seconds +3: Time to load utils op: 0.0015323162078857422 seconds +3: Time to load utils op: 0.0015659332275390625 seconds +7: Time to load utils op: 0.001489877700805664 seconds +4: Time to load utils op: 0.0012962818145751953 seconds +6: Time to load utils op: 0.0013968944549560547 seconds +7: Time to load utils op: 0.0014884471893310547 seconds +6: Time to load utils op: 0.0013833045959472656 seconds +4: Time to load utils op: 0.0013127326965332031 seconds +6: Time to load utils op: 0.0014104843139648438 seconds +7: Time to load utils op: 0.0014657974243164062 seconds +4: Time to load utils op: 0.001344442367553711 seconds +7: Time to load utils op: 0.0015370845794677734 secondsTime to load utils op: 0.0015060901641845703 seconds +7: +7: Time to load utils op: 0.0014503002166748047 seconds +1: Time to load utils op: 0.0010859966278076172 seconds +1: Time to load utils op: 0.0010814666748046875 seconds +1: Time to load utils op: 0.0013527870178222656 seconds +1: Time to load utils op: 0.001341104507446289 seconds +1: Time to load utils op: 0.0013608932495117188 seconds +1: Time to load utils op: 0.001399993896484375 seconds +1: Time to load utils op: 0.0013871192932128906 seconds +1: Time to load utils op: 0.0014171600341796875 seconds +0: [2023-02-09 22:43:41,462] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-02-09 22:43:41,462] [INFO] [utils.py:828:see_memory_usage] MA 0.37 GB Max_MA 0.37 GB CA 0.48 GB Max_CA 0 GB +0: [2023-02-09 22:43:41,463] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.52 GB, percent = 6.3% +0: [2023-02-09 22:43:41,571] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-09 22:43:41,572] [INFO] [utils.py:828:see_memory_usage] MA 0.37 GB Max_MA 0.37 GB CA 0.48 GB Max_CA 0 GB +0: [2023-02-09 22:43:41,572] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.52 GB, percent = 6.3% +0: [2023-02-09 22:43:41,680] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-09 22:43:41,680] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:41,681] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.52 GB, percent = 6.3% +0: [2023-02-09 22:43:41,787] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-09 22:43:41,787] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:41,787] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.52 GB, percent = 6.3% +0: [2023-02-09 22:43:41,894] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-09 22:43:41,895] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:41,895] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.52 GB, percent = 6.3% +0: [2023-02-09 22:43:41,999] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-09 22:43:42,000] [INFO] [utils.py:828:see_memory_usage] MA 0.47 GB Max_MA 0.47 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:42,000] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.52 GB, percent = 6.3% +0: [2023-02-09 22:43:42,129] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-09 22:43:42,129] [INFO] [utils.py:828:see_memory_usage] MA 0.48 GB Max_MA 0.48 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:42,129] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.52 GB, percent = 6.3% +0: [2023-02-09 22:43:42,234] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-09 22:43:42,235] [INFO] [utils.py:828:see_memory_usage] MA 0.48 GB Max_MA 0.48 GB CA 0.58 GB Max_CA 1 GB +0: [2023-02-09 22:43:42,235] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.52 GB, percent = 6.3% +0: [2023-02-09 22:43:42,235] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-09 22:43:42,235] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-09 22:43:42,235] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-09 22:43:42,235] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-09 22:43:42,236] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-09 22:43:42,236] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-09 22:43:42,236] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-09 22:43:42,236] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-09 22:43:42,236] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-09 22:43:42,236] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-09 22:43:42,236] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-09 22:43:42,236] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-09 22:43:42,237] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-09 22:43:42,238] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-09 22:43:42,238] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0004177093505859375 seconds +0: [2023-02-09 22:43:42,239] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-09 22:43:42,249] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=17 [0, 17) STAGE_PARAMS=82741760 (82.742M) TOTAL_PARAMS=82741760 (82.742M) UNIQUE_PARAMS=82741760 (82.742M) +0: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +2: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +0: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +5: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +3: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +4: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt... +1: [2023-02-09 22:43:42,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +7: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/mp_rank_00_model_states.pt. +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,478] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:43:42,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:43:42,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:43:42,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:43:42,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:43:42,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:43:42,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:43:42,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:43:42,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,537] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:43:42,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:43:42,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:43:42,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:43:42,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:43:42,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:43:42,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:43:42,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:43:42,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,690] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:43:42,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,699] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:43:42,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:43:42,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:43:42,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:43:42,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:43:42,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:42,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:42,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:43:43,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:43:43,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:43:43,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:43,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:43,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:43,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:43,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:43,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:43,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:45,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:45,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:45,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:45,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:45,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,015] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:43:45,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:43:45,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:43:45,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:43:45,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:43:45,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:43:45,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,207] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:43:45,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:43:45,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding learning rate value to 0.0002[2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding minimum learning rate value to 2e-05 +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding warmup iterations value to 0 +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:43:45,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:43:45,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:43:45,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:43:45,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:43:45,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:43:45,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,376] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +2: [2023-02-09 22:43:45,377] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +3: [2023-02-09 22:43:45,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,377] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +3: [2023-02-09 22:43:45,379] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +5: [2023-02-09 22:43:45,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,379] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +5: [2023-02-09 22:43:45,381] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +7: [2023-02-09 22:43:45,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,391] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +7: [2023-02-09 22:43:45,392] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +0: [2023-02-09 22:43:45,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,392] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +3: [2023-02-09 22:43:45,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,393] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +0: [2023-02-09 22:43:45,394] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +4: [2023-02-09 22:43:45,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +4: [2023-02-09 22:43:45,394] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +3: [2023-02-09 22:43:45,394] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +4: [2023-02-09 22:43:45,395] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +5: [2023-02-09 22:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,396] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +5: [2023-02-09 22:43:45,397] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +3: [2023-02-09 22:43:45,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,399] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +3: [2023-02-09 22:43:45,400] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +0: [2023-02-09 22:43:45,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,401] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +5: [2023-02-09 22:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,401] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +0: [2023-02-09 22:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,402] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +0: [2023-02-09 22:43:45,402] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +5: [2023-02-09 22:43:45,402] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +0: [2023-02-09 22:43:45,403] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +2: [2023-02-09 22:43:45,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,406] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +2: [2023-02-09 22:43:45,406] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +1: [2023-02-09 22:43:45,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,407] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +7: [2023-02-09 22:43:45,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,407] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +7: [2023-02-09 22:43:45,407] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +1: [2023-02-09 22:43:45,407] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +2: [2023-02-09 22:43:45,407] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +2: [2023-02-09 22:43:45,407] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +4: [2023-02-09 22:43:45,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,408] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +1: [2023-02-09 22:43:45,408] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +1: [2023-02-09 22:43:45,408] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +7: [2023-02-09 22:43:45,408] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +7: [2023-02-09 22:43:45,408] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +2: [2023-02-09 22:43:45,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,409] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +4: [2023-02-09 22:43:45,409] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +4: [2023-02-09 22:43:45,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,410] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +1: [2023-02-09 22:43:45,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,410] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +1: [2023-02-09 22:43:45,410] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +3: [2023-02-09 22:43:45,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,411] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +4: [2023-02-09 22:43:45,411] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +3: [2023-02-09 22:43:45,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,411] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +3: [2023-02-09 22:43:45,412] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +1: [2023-02-09 22:43:45,412] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +7: [2023-02-09 22:43:45,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,412] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +3: [2023-02-09 22:43:45,412] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +7: [2023-02-09 22:43:45,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,413] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +0: [2023-02-09 22:43:45,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,414] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +0: [2023-02-09 22:43:45,414] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +7: [2023-02-09 22:43:45,414] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +1: [2023-02-09 22:43:45,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,414] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +0: [2023-02-09 22:43:45,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,415] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +5: [2023-02-09 22:43:45,415] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +0: [2023-02-09 22:43:45,415] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +5: [2023-02-09 22:43:45,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,415] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +5: [2023-02-09 22:43:45,416] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +0: [2023-02-09 22:43:45,416] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +5: [2023-02-09 22:43:45,416] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +2: [2023-02-09 22:43:45,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,417] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +5: [2023-02-09 22:43:45,417] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +2: [2023-02-09 22:43:45,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,417] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +1: [2023-02-09 22:43:45,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,418] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +1: [2023-02-09 22:43:45,418] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +2: [2023-02-09 22:43:45,418] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +1: [2023-02-09 22:43:45,419] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +7: [2023-02-09 22:43:45,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,419] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +7: [2023-02-09 22:43:45,420] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +6: [2023-02-09 22:43:45,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,421] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +6: [2023-02-09 22:43:45,422] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +5: [2023-02-09 22:43:45,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,423] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +6: [2023-02-09 22:43:45,423] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +3: [2023-02-09 22:43:45,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,424] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +5: [2023-02-09 22:43:45,424] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +6: [2023-02-09 22:43:45,424] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +6: [2023-02-09 22:43:45,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,425] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +6: [2023-02-09 22:43:45,425] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +0: [2023-02-09 22:43:45,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:45,425] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +0: [2023-02-09 22:43:45,425] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +6: [2023-02-09 22:43:45,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,426] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +6: [2023-02-09 22:43:45,426] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +6: [2023-02-09 22:43:45,426] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +6: [2023-02-09 22:43:45,426] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +6: [2023-02-09 22:43:45,428] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +1: [2023-02-09 22:43:45,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,432] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +1: [2023-02-09 22:43:45,431] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +4: [2023-02-09 22:43:45,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,432] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +1: [2023-02-09 22:43:45,433] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +4: [2023-02-09 22:43:45,433] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +4: [2023-02-09 22:43:45,433] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +4: [2023-02-09 22:43:45,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,435] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +5: [2023-02-09 22:43:45,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,435] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +4: [2023-02-09 22:43:45,436] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +5: [2023-02-09 22:43:45,436] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +5: [2023-02-09 22:43:45,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:43:45,440] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +7: [2023-02-09 22:43:45,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,441] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +5: [2023-02-09 22:43:45,441] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +6: [2023-02-09 22:43:45,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,442] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +7: [2023-02-09 22:43:45,442] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +1: [2023-02-09 22:43:45,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,443] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +1: [2023-02-09 22:43:45,443] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +1: [2023-02-09 22:43:45,444] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +6: [2023-02-09 22:43:45,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,445] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +6: [2023-02-09 22:43:45,446] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +0: [2023-02-09 22:43:45,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,448] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +0: [2023-02-09 22:43:45,450] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +4: [2023-02-09 22:43:45,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,451] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +0: [2023-02-09 22:43:45,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:43:45,452] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +6: [2023-02-09 22:43:45,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:43:45,452] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +4: [2023-02-09 22:43:45,452] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +0: [2023-02-09 22:43:45,453] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +6: [2023-02-09 22:43:45,453] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +2: [2023-02-09 22:43:45,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,457] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +2: [2023-02-09 22:43:45,458] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +1: [2023-02-09 22:43:45,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:43:45,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +1: [2023-02-09 22:43:45,466] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +2: [2023-02-09 22:43:45,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:43:45,473] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +2: [2023-02-09 22:43:45,474] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +4: [2023-02-09 22:43:45,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:43:45,476] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +4: [2023-02-09 22:43:45,477] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +7: [2023-02-09 22:43:45,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:43:45,835] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +7: [2023-02-09 22:43:45,836] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +3: [2023-02-09 22:43:46,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:46,505] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +3: [2023-02-09 22:43:46,506] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +3: [2023-02-09 22:43:46,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_83m20b20b/global_step37905/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:43:46,512] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +3: [2023-02-09 22:43:46,513] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +0: successfully loaded checkpoint from checkpoints_83m20b20b at iteration 0 +7: time (ms) | load-checkpoint: 4266.01 +0: estimated model parameters: 0.08274176 +0: estimated model parameters without embeddings: 0.04923648 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-09 22:43:46 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.006724 seconds +0: number of documents: 41786294 +0: > dataset split: +0: train: +0: document indices in [0, 41786294) total of 41786294 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.110 seconds +0: total number of samples: 9767463 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.038608 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.080 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-09 22:44:00 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 20308.22 | train/valid/test-data-iterators-setup: 13164.06 +0: [after training is done] datetime: 2023-02-09 22:44:00 +0: [2023-02-09 22:44:01,220] [INFO] [checkpointing.py:553:forward] Activation Checkpointing Information +0: [2023-02-09 22:44:01,220] [INFO] [checkpointing.py:554:forward] ----Partition Activations False, CPU CHECKPOINTING False +0: [2023-02-09 22:44:01,220] [INFO] [checkpointing.py:557:forward] ----contiguous Memory Checkpointing False with None total layers +0: [2023-02-09 22:44:01,220] [INFO] [checkpointing.py:560:forward] ----Synchronization False +0: [2023-02-09 22:44:01,220] [INFO] [checkpointing.py:561:forward] ----Profiling time in checkpointing False +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.608018E+00 | lm loss PPL: 3.689286E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 2820863: Thu 09 Feb 2023 10:44:21 PM EET diff --git a/83m20b20b/sbatch_83m20b20b.sh b/83m20b20b/sbatch_83m20b20b.sh new file mode 100755 index 0000000000000000000000000000000000000000..1b8bc0607df59bea65af78ed6377dc5578596513 --- /dev/null +++ b/83m20b20b/sbatch_83m20b20b.sh @@ -0,0 +1,168 @@ +#!/bin/bash +#SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901 +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 2-0:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=83m20b20b + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT +mkdir -p $CHECKPOINT_PATH +mkdir -p $TENSORBOARD_PATH + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train20b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_74M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 19873180000 +# -> Samples: 9703701 +TRAIN_SAMPLES=9_703_701 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 97_037 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --loss-scale 12 \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + --checkpoint-activations \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/83m20b20b/sbatch_83m20b20bval.sh b/83m20b20b/sbatch_83m20b20bval.sh new file mode 100644 index 0000000000000000000000000000000000000000..c3a38115ecaa561678ab653a073e09b9a1128e83 --- /dev/null +++ b/83m20b20b/sbatch_83m20b20bval.sh @@ -0,0 +1,173 @@ +#!/bin/bash +#SBATCH --exclude=nid007571,nid007112,nid006774,nid007502,nid007506,nid007507,nid005145,nid006692,nid007218,nid007123,nid006124,nid006123,nid007496,nid007237,nid006852,nid007206,nid006947,nid007212,nid006977,nid007222,nid005444,nid007219,nid007493,nid007221,nid005300,nid005619,nid006118,nid005203,nid006113,nid006481,nid007077,nid005208,nid005207,nid005879,nid005901 +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 2-0:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=83m20b20bval +VARIANT_CKPT=83m20b20b + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT +mkdir -p $CHECKPOINT_PATH +mkdir -p $TENSORBOARD_PATH + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train20b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_20B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_74M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 19873180000 +# -> Samples: 9703701 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --loss-scale 12 \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + --checkpoint-activations \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/83m20b20b/tensorboard_83m20b20bval/events.out.tfevents.1675555121.nid005497.115440.0 b/83m20b20b/tensorboard_83m20b20bval/events.out.tfevents.1675555121.nid005497.115440.0 new file mode 100644 index 0000000000000000000000000000000000000000..329e372dab156bd5f78d34cb49f506d8eb62e80e --- /dev/null +++ b/83m20b20b/tensorboard_83m20b20bval/events.out.tfevents.1675555121.nid005497.115440.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5282cd87a28bcafe9bf88b2ae3583190b19077560709ba148d3a8579af57b304 +size 980 diff --git a/83m20b20b/tensorboard_83m20b20bval/events.out.tfevents.1675975385.nid006025.100490.0 b/83m20b20b/tensorboard_83m20b20bval/events.out.tfevents.1675975385.nid006025.100490.0 new file mode 100644 index 0000000000000000000000000000000000000000..d8d6d88c6dd5f6df36caf17b5b813b207b465b8c --- /dev/null +++ b/83m20b20b/tensorboard_83m20b20bval/events.out.tfevents.1675975385.nid006025.100490.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c758d4b798066877c48375b84599fd3d4609f89a55467657f53234ec800f8617 +size 980 diff --git a/83m20b20b/transformers/config.json b/83m20b20b/transformers/config.json new file mode 100644 index 0000000000000000000000000000000000000000..553a76e467e8e403cf19f94f84cd1613f403f329 --- /dev/null +++ b/83m20b20b/transformers/config.json @@ -0,0 +1 @@ +{"vocab_size": 50304, "n_positions": 2048, "n_embd": 640, "n_layer": 10, "n_head": 10, "n_inner": 2560, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"} \ No newline at end of file diff --git a/83m20b20b/transformers/pytorch_model.bin b/83m20b20b/transformers/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4474489a01cddab379ddecb9aad3ef8160ac5a47 --- /dev/null +++ b/83m20b20b/transformers/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e695b28084ec1067ccecbdb07f243d531d2e02efae9925a10d75e5cf2cbcf76 +size 249414221