dminhvu02 commited on
Commit
55e3b27
·
verified ·
1 Parent(s): ddc1980

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/tokenizer_config-checkpoint.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "<unk>",
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false,
42
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '</s>'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
43
+ }
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/model_zoo/Vivid-7B-base",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 128,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "up_proj",
18
+ "o_proj",
19
+ "k_proj",
20
+ "q_proj",
21
+ "v_proj",
22
+ "down_proj",
23
+ "gate_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578129979aec206237de8cc7357798625889bfc1fea97b7d288cee1e90cdca4c
3
+ size 335700042
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/model_zoo/Vivid-7B-base",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bert_type": "qformer_layer:12",
8
+ "bos_token_id": 1,
9
+ "compress_type": "mean",
10
+ "eos_token_id": 2,
11
+ "freeze_mm_mlp_adapter": false,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "image_aspect_ratio": "pad",
15
+ "image_grid_pinpoints": null,
16
+ "image_processor": "./llamavid/processor/intern-vit",
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 14336,
19
+ "max_position_embeddings": 32768,
20
+ "max_token": 8192,
21
+ "mm_hidden_size": 1024,
22
+ "mm_projector_type": "mlp2x_gelu",
23
+ "mm_use_im_patch_token": false,
24
+ "mm_use_im_start_end": false,
25
+ "mm_vision_select_feature": "patch",
26
+ "mm_vision_select_layer": -1,
27
+ "mm_vision_tower": "/model_zoo/intern-vit",
28
+ "model_type": "mistral",
29
+ "num_attention_heads": 32,
30
+ "num_hidden_layers": 32,
31
+ "num_key_value_heads": 8,
32
+ "num_query": 32,
33
+ "rms_norm_eps": 1e-05,
34
+ "rope_theta": 10000.0,
35
+ "sliding_window": 4096,
36
+ "tie_word_embeddings": false,
37
+ "torch_dtype": "bfloat16",
38
+ "transformers_version": "4.38.2",
39
+ "tune_mm_mlp_adapter": false,
40
+ "use_cache": true,
41
+ "use_mm_proj": true,
42
+ "vocab_size": 48384
43
+ }
non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23fd0da8119435c380bd1c6eb4d2dccb2648b790293f90e2a3fbe33b7de10c3c
3
+ size 469482528
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d88bdadaa2a065aa7c6e18a4b5999ce4c76cec14d9fea882102e7b4931d7ef0
3
+ size 779539
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "<unk>",
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false,
42
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '</s>'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
43
+ }
trainer_state.json ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9788235294117648,
5
+ "eval_steps": 500,
6
+ "global_step": 26,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "grad_norm": 4.103447222745244,
14
+ "learning_rate": 0.0001,
15
+ "loss": 1.5918,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.08,
20
+ "grad_norm": 3.930916372129095,
21
+ "learning_rate": 9.96057350657239e-05,
22
+ "loss": 1.6299,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.11,
27
+ "grad_norm": 4.398070948517042,
28
+ "learning_rate": 9.842915805643155e-05,
29
+ "loss": 1.6372,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.15,
34
+ "grad_norm": 2.17279784653199,
35
+ "learning_rate": 9.648882429441257e-05,
36
+ "loss": 1.5483,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.19,
41
+ "grad_norm": 2.251687596185326,
42
+ "learning_rate": 9.381533400219318e-05,
43
+ "loss": 1.6069,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.23,
48
+ "grad_norm": 2.1869658645951886,
49
+ "learning_rate": 9.045084971874738e-05,
50
+ "loss": 1.4985,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.26,
55
+ "grad_norm": 1.7267817051984844,
56
+ "learning_rate": 8.644843137107059e-05,
57
+ "loss": 1.4424,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.3,
62
+ "grad_norm": 1.9101891237074138,
63
+ "learning_rate": 8.18711994874345e-05,
64
+ "loss": 1.3213,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.34,
69
+ "grad_norm": 1.6330633181587038,
70
+ "learning_rate": 7.679133974894983e-05,
71
+ "loss": 1.4426,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.38,
76
+ "grad_norm": 1.6305240585237435,
77
+ "learning_rate": 7.128896457825364e-05,
78
+ "loss": 1.3071,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.41,
83
+ "grad_norm": 1.3944693110209463,
84
+ "learning_rate": 6.545084971874738e-05,
85
+ "loss": 1.3665,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 0.45,
90
+ "grad_norm": 1.4126280206859627,
91
+ "learning_rate": 5.9369065729286245e-05,
92
+ "loss": 1.2878,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 0.49,
97
+ "grad_norm": 1.367322681350601,
98
+ "learning_rate": 5.313952597646568e-05,
99
+ "loss": 1.3589,
100
+ "step": 13
101
+ },
102
+ {
103
+ "epoch": 0.53,
104
+ "grad_norm": 1.2053182377155975,
105
+ "learning_rate": 4.6860474023534335e-05,
106
+ "loss": 1.3247,
107
+ "step": 14
108
+ },
109
+ {
110
+ "epoch": 0.56,
111
+ "grad_norm": 1.109279281261198,
112
+ "learning_rate": 4.063093427071376e-05,
113
+ "loss": 1.2959,
114
+ "step": 15
115
+ },
116
+ {
117
+ "epoch": 0.6,
118
+ "grad_norm": 1.3045532630263748,
119
+ "learning_rate": 3.4549150281252636e-05,
120
+ "loss": 1.3459,
121
+ "step": 16
122
+ },
123
+ {
124
+ "epoch": 0.64,
125
+ "grad_norm": 1.197638462603245,
126
+ "learning_rate": 2.8711035421746367e-05,
127
+ "loss": 1.4053,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 0.68,
132
+ "grad_norm": 1.2235614357026008,
133
+ "learning_rate": 2.3208660251050158e-05,
134
+ "loss": 1.301,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 0.72,
139
+ "grad_norm": 1.4513710687985,
140
+ "learning_rate": 1.8128800512565513e-05,
141
+ "loss": 1.3291,
142
+ "step": 19
143
+ },
144
+ {
145
+ "epoch": 0.75,
146
+ "grad_norm": 1.0341249332438227,
147
+ "learning_rate": 1.3551568628929434e-05,
148
+ "loss": 1.394,
149
+ "step": 20
150
+ },
151
+ {
152
+ "epoch": 0.79,
153
+ "grad_norm": 1.251919439255797,
154
+ "learning_rate": 9.549150281252633e-06,
155
+ "loss": 1.3943,
156
+ "step": 21
157
+ },
158
+ {
159
+ "epoch": 0.83,
160
+ "grad_norm": 1.1326570760452561,
161
+ "learning_rate": 6.184665997806832e-06,
162
+ "loss": 1.311,
163
+ "step": 22
164
+ },
165
+ {
166
+ "epoch": 0.87,
167
+ "grad_norm": 1.163110395778291,
168
+ "learning_rate": 3.511175705587433e-06,
169
+ "loss": 1.4033,
170
+ "step": 23
171
+ },
172
+ {
173
+ "epoch": 0.9,
174
+ "grad_norm": 1.1416927151375318,
175
+ "learning_rate": 1.5708419435684462e-06,
176
+ "loss": 1.271,
177
+ "step": 24
178
+ },
179
+ {
180
+ "epoch": 0.94,
181
+ "grad_norm": 1.0311072964787094,
182
+ "learning_rate": 3.9426493427611177e-07,
183
+ "loss": 1.2318,
184
+ "step": 25
185
+ },
186
+ {
187
+ "epoch": 0.98,
188
+ "grad_norm": 0.9861863614033847,
189
+ "learning_rate": 0.0,
190
+ "loss": 1.2832,
191
+ "step": 26
192
+ },
193
+ {
194
+ "epoch": 0.98,
195
+ "step": 26,
196
+ "total_flos": 1.1771536628462387e+17,
197
+ "train_loss": 1.3973060021033654,
198
+ "train_runtime": 832.4023,
199
+ "train_samples_per_second": 4.085,
200
+ "train_steps_per_second": 0.031
201
+ }
202
+ ],
203
+ "logging_steps": 1.0,
204
+ "max_steps": 26,
205
+ "num_input_tokens_seen": 0,
206
+ "num_train_epochs": 1,
207
+ "save_steps": 50,
208
+ "total_flos": 1.1771536628462387e+17,
209
+ "train_batch_size": 8,
210
+ "trial_name": null,
211
+ "trial_params": null
212
+ }