Delete t0-v4/logs
Browse files- t0-v4/logs/initial_config.json +0 -85
- t0-v4/logs/log.txt +0 -138
t0-v4/logs/initial_config.json
DELETED
@@ -1,85 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"debug": false,
|
3 |
-
"project_name": "chatgpt-instruction-nomic-embedding",
|
4 |
-
"name": "t0-v4",
|
5 |
-
"project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
|
6 |
-
"data_dir": "/nas-hdd/prateek/data",
|
7 |
-
"output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
|
8 |
-
"config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
|
9 |
-
"seed": 42,
|
10 |
-
"hf_write_token": null,
|
11 |
-
"origin_model": "google/t5-xl-lm-adapt",
|
12 |
-
"model_class": "seq2seq_lm",
|
13 |
-
"model_type": "encdec",
|
14 |
-
"peft_type": "lora",
|
15 |
-
"load_model_dtype": "float32",
|
16 |
-
"val_fraction": 0.2,
|
17 |
-
"dataset": [
|
18 |
-
"t0"
|
19 |
-
],
|
20 |
-
"eval_dataset": null,
|
21 |
-
"eval_split": "val",
|
22 |
-
"num_steps": 1500,
|
23 |
-
"effective_train_batch_size": 128,
|
24 |
-
"patience": 3,
|
25 |
-
"verbose": false,
|
26 |
-
"do_test": false,
|
27 |
-
"eval_steps": 100,
|
28 |
-
"save_last": true,
|
29 |
-
"save_best": true,
|
30 |
-
"logging_steps": 5,
|
31 |
-
"gradient_checkpointing": false,
|
32 |
-
"moe_inference": false,
|
33 |
-
"inference_batch_size_scale": 1,
|
34 |
-
"checkpoint_dir_or_path": null,
|
35 |
-
"cl_checkpoint_path": null,
|
36 |
-
"load_checkpoint_dataset": null,
|
37 |
-
"ae_checkpoint_dir": null,
|
38 |
-
"init_datasets": [
|
39 |
-
"t0-cl-init1"
|
40 |
-
],
|
41 |
-
"selected_expert_ids": null,
|
42 |
-
"merge_num_clusters": null,
|
43 |
-
"global_clustering": false,
|
44 |
-
"hierarchical_num_clusters": null,
|
45 |
-
"hierarchical_cluster_token_routing": false,
|
46 |
-
"save_router_state_dict": false,
|
47 |
-
"bias_router_embedding_path": null,
|
48 |
-
"bias_input_embedding_path": null,
|
49 |
-
"optimizer": "adamw",
|
50 |
-
"lr": 0.003,
|
51 |
-
"trainable_param_names": ".*lora.*",
|
52 |
-
"scheduler": "linear_decay_with_warmup",
|
53 |
-
"warmup_steps": null,
|
54 |
-
"warmup_ratio": 0.02,
|
55 |
-
"weight_decay": 0,
|
56 |
-
"scale_parameter": true,
|
57 |
-
"mix_precision": "bf16",
|
58 |
-
"gradient_clipping": 1.0,
|
59 |
-
"target_modules": "all-linear",
|
60 |
-
"lora_rank": 16,
|
61 |
-
"lora_alpha": 1,
|
62 |
-
"lora_dropout": 0.0,
|
63 |
-
"use_rslora": false,
|
64 |
-
"init_lora_weights": true,
|
65 |
-
"lora_bias": "none",
|
66 |
-
"moe_router_aux_loss_coef": 0.0,
|
67 |
-
"moe_top_k": 2,
|
68 |
-
"moe_top_p": 1.0,
|
69 |
-
"moe_reweight_output": true,
|
70 |
-
"bias_routing_scale": 0,
|
71 |
-
"bias_routing_dim": -1,
|
72 |
-
"lora_init_method": "usage-based",
|
73 |
-
"gate_init_method": "zero",
|
74 |
-
"zeroshot_tolerance": 0.05,
|
75 |
-
"upper_bound_tolerance": 0.05,
|
76 |
-
"single_lora_gate_train_steps": 200,
|
77 |
-
"molora_gate_train_samples": 1000,
|
78 |
-
"molora_gate_train_steps": 100,
|
79 |
-
"layer_norm_after_train_single_lora": true,
|
80 |
-
"cpu_cont": 96,
|
81 |
-
"run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4",
|
82 |
-
"log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/logs",
|
83 |
-
"prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/prediction",
|
84 |
-
"checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/checkpoints"
|
85 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
t0-v4/logs/log.txt
DELETED
@@ -1,138 +0,0 @@
|
|
1 |
-
2024-09-11 14:17:11,625 - log.txt - [INFO] - Start experiment chatgpt-instruction-nomic-embedding/t0-v4
|
2 |
-
2024-09-11 14:17:11,625 - log.txt - [INFO] - {
|
3 |
-
"debug": false,
|
4 |
-
"project_name": "chatgpt-instruction-nomic-embedding",
|
5 |
-
"name": "t0-v4",
|
6 |
-
"project_dir": "/home/pingzhi/phatgoose-cl/src_simple",
|
7 |
-
"data_dir": "/nas-hdd/prateek/data",
|
8 |
-
"output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs",
|
9 |
-
"config_dir": "/home/pingzhi/phatgoose-cl/src_simple/configs",
|
10 |
-
"seed": 42,
|
11 |
-
"hf_write_token": null,
|
12 |
-
"origin_model": "google/t5-xl-lm-adapt",
|
13 |
-
"model_class": "seq2seq_lm",
|
14 |
-
"model_type": "encdec",
|
15 |
-
"peft_type": "lora",
|
16 |
-
"load_model_dtype": "float32",
|
17 |
-
"val_fraction": 0.2,
|
18 |
-
"dataset": [
|
19 |
-
"t0"
|
20 |
-
],
|
21 |
-
"eval_dataset": null,
|
22 |
-
"eval_split": "val",
|
23 |
-
"num_steps": 1500,
|
24 |
-
"effective_train_batch_size": 128,
|
25 |
-
"patience": 3,
|
26 |
-
"verbose": false,
|
27 |
-
"do_test": false,
|
28 |
-
"eval_steps": 100,
|
29 |
-
"save_last": true,
|
30 |
-
"save_best": true,
|
31 |
-
"logging_steps": 5,
|
32 |
-
"gradient_checkpointing": false,
|
33 |
-
"moe_inference": false,
|
34 |
-
"inference_batch_size_scale": 1,
|
35 |
-
"checkpoint_dir_or_path": null,
|
36 |
-
"cl_checkpoint_path": null,
|
37 |
-
"load_checkpoint_dataset": null,
|
38 |
-
"ae_checkpoint_dir": null,
|
39 |
-
"init_datasets": [
|
40 |
-
"t0-cl-init1"
|
41 |
-
],
|
42 |
-
"selected_expert_ids": null,
|
43 |
-
"merge_num_clusters": null,
|
44 |
-
"global_clustering": false,
|
45 |
-
"hierarchical_num_clusters": null,
|
46 |
-
"hierarchical_cluster_token_routing": false,
|
47 |
-
"save_router_state_dict": false,
|
48 |
-
"bias_router_embedding_path": null,
|
49 |
-
"bias_input_embedding_path": null,
|
50 |
-
"optimizer": "adamw",
|
51 |
-
"lr": 0.003,
|
52 |
-
"trainable_param_names": ".*lora.*",
|
53 |
-
"scheduler": "linear_decay_with_warmup",
|
54 |
-
"warmup_steps": null,
|
55 |
-
"warmup_ratio": 0.02,
|
56 |
-
"weight_decay": 0,
|
57 |
-
"scale_parameter": true,
|
58 |
-
"mix_precision": "bf16",
|
59 |
-
"gradient_clipping": 1.0,
|
60 |
-
"target_modules": "all-linear",
|
61 |
-
"lora_rank": 16,
|
62 |
-
"lora_alpha": 1,
|
63 |
-
"lora_dropout": 0.0,
|
64 |
-
"use_rslora": false,
|
65 |
-
"init_lora_weights": true,
|
66 |
-
"lora_bias": "none",
|
67 |
-
"moe_router_aux_loss_coef": 0.0,
|
68 |
-
"moe_top_k": 2,
|
69 |
-
"moe_top_p": 1.0,
|
70 |
-
"moe_reweight_output": true,
|
71 |
-
"bias_routing_scale": 0,
|
72 |
-
"bias_routing_dim": -1,
|
73 |
-
"lora_init_method": "usage-based",
|
74 |
-
"gate_init_method": "zero",
|
75 |
-
"zeroshot_tolerance": 0.05,
|
76 |
-
"upper_bound_tolerance": 0.05,
|
77 |
-
"single_lora_gate_train_steps": 200,
|
78 |
-
"molora_gate_train_samples": 1000,
|
79 |
-
"molora_gate_train_steps": 100,
|
80 |
-
"layer_norm_after_train_single_lora": true,
|
81 |
-
"cpu_cont": 96,
|
82 |
-
"run_output_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4",
|
83 |
-
"log_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/logs",
|
84 |
-
"prediction_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/prediction",
|
85 |
-
"checkpoint_dir": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/checkpoints",
|
86 |
-
"finish_flag_file": "/home/pingzhi/phatgoose-cl/src_simple/saved_runs/chatgpt-instruction-nomic-embedding/t0-v4/exp_completed.txt"
|
87 |
-
}
|
88 |
-
2024-09-11 14:17:18,316 - log.txt - [INFO] - Tasks ['p3socialiqa', 'p3wiqa', 'p3cosmosqa', 'p3quail', 'p3quartz', 'p3qasc', 'p3commonsenseqa', 'p3quarel', 'p3dream', 'p3sciq', 'p3wikihop', 'p3ropes', 'p3adversarialqa', 'p3duorc', 'p3quoref', 'p3hotpotqa', 'p3wikiqa', 'p3amazonpolarity', 'p3appreviews', 'p3rottentomatoes', 'p3imdb', 'p3yelp', 'p3agnews', 'p3dbpedia14', 'p3trec', 'p3wikibio', 'p3commongen', 'p3cnndailymail', 'p3multinews', 'p3gigaword', 'p3samsum', 'p3xsum', 'p3paws', 'p3qqp', 'p3mrpc', 'p3hswag', 'p3copa', 'p3storycloze', 'p3cb', 'p3rte', 'p3anlir1', 'p3anlir2', 'p3anlir3', 'p3winogrande', 'p3wscfixed', 'p3wic']
|
89 |
-
2024-09-11 14:17:19,935 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'social_i_qa'] Num Templates: 4 Datasize 128
|
90 |
-
2024-09-11 14:17:22,070 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiqa'] Num Templates: 2 Datasize 128
|
91 |
-
2024-09-11 14:17:24,069 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'cosmos_qa'] Num Templates: 10 Datasize 128
|
92 |
-
2024-09-11 14:17:26,907 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quail'] Num Templates: 10 Datasize 128
|
93 |
-
2024-09-11 14:17:29,561 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quartz'] Num Templates: 8 Datasize 128
|
94 |
-
2024-09-11 14:17:31,981 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'qasc'] Num Templates: 5 Datasize 128
|
95 |
-
2024-09-11 14:17:34,492 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'commonsense_qa'] Num Templates: 4 Datasize 128
|
96 |
-
2024-09-11 14:17:36,947 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quarel'] Num Templates: 5 Datasize 128
|
97 |
-
2024-09-11 14:17:38,760 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'dream'] Num Templates: 2 Datasize 128
|
98 |
-
2024-09-11 14:17:41,104 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'sciq'] Num Templates: 4 Datasize 128
|
99 |
-
2024-09-11 14:17:42,704 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'MoE-UNC/wikihop'] Num Templates: 5 Datasize 128
|
100 |
-
2024-09-11 14:17:45,187 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'ropes'] Num Templates: 10 Datasize 128
|
101 |
-
2024-09-11 14:17:47,879 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'adversarial_qa', 'adversarialQA'] Num Templates: 4 Datasize 128
|
102 |
-
2024-09-11 14:17:52,220 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'duorc', 'ParaphraseRC'] Num Templates: 5 Datasize 128
|
103 |
-
2024-09-11 14:17:54,156 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'quoref'] Num Templates: 10 Datasize 128
|
104 |
-
2024-09-11 14:17:55,658 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'hotpot_qa', 'fullwiki'] Num Templates: 5 Datasize 128
|
105 |
-
2024-09-11 14:17:59,481 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiki_qa'] Num Templates: 5 Datasize 128
|
106 |
-
2024-09-11 14:18:03,108 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'amazon_polarity'] Num Templates: 9 Datasize 128
|
107 |
-
2024-09-11 14:18:05,068 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'app_reviews'] Num Templates: 1 Datasize 128
|
108 |
-
2024-09-11 14:18:06,970 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'rotten_tomatoes'] Num Templates: 10 Datasize 128
|
109 |
-
2024-09-11 14:18:09,884 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'imdb'] Num Templates: 10 Datasize 128
|
110 |
-
2024-09-11 14:18:12,193 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'yelp_review_full'] Num Templates: 7 Datasize 128
|
111 |
-
2024-09-11 14:18:14,277 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'ag_news'] Num Templates: 7 Datasize 128
|
112 |
-
2024-09-11 14:18:16,545 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'dbpedia_14'] Num Templates: 4 Datasize 128
|
113 |
-
2024-09-11 14:18:18,280 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'trec'] Num Templates: 1 Datasize 100
|
114 |
-
2024-09-11 14:18:21,112 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'wiki_bio'] Num Templates: 1 Datasize 128
|
115 |
-
2024-09-11 14:18:23,877 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'common_gen'] Num Templates: 6 Datasize 128
|
116 |
-
2024-09-11 14:18:27,213 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'cnn_dailymail', '3.0.0'] Num Templates: 7 Datasize 128
|
117 |
-
2024-09-11 14:18:29,546 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'multi_news'] Num Templates: 5 Datasize 128
|
118 |
-
2024-09-11 14:18:32,866 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'gigaword'] Num Templates: 7 Datasize 128
|
119 |
-
2024-09-11 14:18:35,238 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'samsum'] Num Templates: 6 Datasize 128
|
120 |
-
2024-09-11 14:18:37,088 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'xsum'] Num Templates: 10 Datasize 128
|
121 |
-
2024-09-11 14:18:40,582 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'paws', 'labeled_final'] Num Templates: 11 Datasize 128
|
122 |
-
2024-09-11 14:18:43,367 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'glue', 'qqp'] Num Templates: 5 Datasize 128
|
123 |
-
2024-09-11 14:18:45,352 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'glue', 'mrpc'] Num Templates: 5 Datasize 128
|
124 |
-
2024-09-11 14:18:47,112 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'hellaswag'] Num Templates: 4 Datasize 128
|
125 |
-
2024-09-11 14:18:48,387 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'copa'] Num Templates: 8 Datasize 128
|
126 |
-
2024-09-11 14:18:49,854 - root - [WARNING] - Tried instantiating `DatasetTemplates` for MoE-UNC/story_cloze, but no prompts found. Please ignore this warning if you are creating new prompts for this dataset.
|
127 |
-
2024-09-11 14:18:49,886 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'MoE-UNC/story_cloze'] Num Templates: 5 Datasize 128
|
128 |
-
2024-09-11 14:18:51,053 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'cb'] Num Templates: 15 Datasize 128
|
129 |
-
2024-09-11 14:18:52,171 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'rte'] Num Templates: 10 Datasize 128
|
130 |
-
2024-09-11 14:18:56,259 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
|
131 |
-
2024-09-11 14:18:59,502 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
|
132 |
-
2024-09-11 14:19:03,139 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'anli'] Num Templates: 15 Datasize 128
|
133 |
-
2024-09-11 14:19:04,440 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'winogrande', 'winogrande_xl'] Num Templates: 5 Datasize 128
|
134 |
-
2024-09-11 14:19:05,559 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'wsc.fixed'] Num Templates: 10 Datasize 128
|
135 |
-
2024-09-11 14:19:06,690 - log.txt - [INFO] - Val Dataset Path: ['huggingface', 'super_glue', 'wic'] Num Templates: 10 Datasize 128
|
136 |
-
2024-09-11 14:19:06,926 - sentence_transformers.SentenceTransformer - [INFO] - Use pytorch device_name: cuda
|
137 |
-
2024-09-11 14:19:06,926 - sentence_transformers.SentenceTransformer - [INFO] - Load pretrained SentenceTransformer: nomic-ai/nomic-embed-text-v1.5
|
138 |
-
2024-09-11 14:19:09,181 - transformers_modules.nomic-ai.nomic-bert-2048.4bb68f63016e88e53e48df904c6ab4e6f718e198.modeling_hf_nomic_bert - [WARNING] - <All keys matched successfully>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|