Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,100 @@
|
|
1 |
-
---
|
2 |
-
license: apache-2.0
|
3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
---
|
4 |
+
# maywell/EXAONE-3.0-7.8B-Instruct-Llamafied
|
5 |
+
|
6 |
+
LG์์ ๋์ผ ๋ผ์ด์ผ์ค ์ฌ๋ฐฐํฌ์กฐ์ฐจ ๋ง์๋ฒ๋ฆฐ ๊ด๊ณ๋ก ๋ชจ๋ธ์ ๊ณต์ ํ ์ ์๊ฒ ๋์์ต๋๋ค.
|
7 |
+
vLLM, ์ถ๋ก ๋ฐ ๊ธฐํ ํ์ฉ์ผ๋ก Llamafied ๋ชจ๋ธ์ด ํ์ํ๋ค๋ฉด ์๋ ์คํฌ๋ฆฝํธ๋ฅผ ์คํํด์ ์ฌ์ฉํด์ฃผ์๋ฉด ๊ฐ์ฌํ๊ฒ ์ต๋๋ค.
|
8 |
+
|
9 |
+
```python
|
10 |
+
import torch
|
11 |
+
from transformers import LlamaConfig, LlamaForCausalLM
|
12 |
+
from modeling_exaone import ExaoneForCausalLM
|
13 |
+
import gc
|
14 |
+
|
15 |
+
def load_model(model_path, model_class, torch_dtype=torch.bfloat16):
|
16 |
+
"""Load and return a model given its path and class."""
|
17 |
+
return model_class.from_pretrained(model_path, torch_dtype=torch_dtype, device_map="auto")
|
18 |
+
|
19 |
+
def unload_model(model):
|
20 |
+
"""Clear memory by deleting a model and calling the garbage collector."""
|
21 |
+
del model
|
22 |
+
gc.collect()
|
23 |
+
torch.cuda.empty_cache()
|
24 |
+
|
25 |
+
def create_llama_config(exaone_config):
|
26 |
+
"""Create and return a LLaMA configuration based on EXAONE config."""
|
27 |
+
return LlamaConfig(
|
28 |
+
vocab_size=exaone_config.vocab_size,
|
29 |
+
hidden_size=exaone_config.hidden_size,
|
30 |
+
intermediate_size=exaone_config.intermediate_size,
|
31 |
+
num_hidden_layers=exaone_config.num_layers,
|
32 |
+
num_attention_heads=exaone_config.num_attention_heads,
|
33 |
+
max_position_embeddings=exaone_config.max_position_embeddings,
|
34 |
+
rms_norm_eps=exaone_config.layer_norm_epsilon,
|
35 |
+
num_key_value_heads=exaone_config.num_key_value_heads,
|
36 |
+
rope_theta=exaone_config.rope_theta,
|
37 |
+
attention_bias=False,
|
38 |
+
)
|
39 |
+
|
40 |
+
def copy_embedding_weights(llama_model, exaone_model):
|
41 |
+
"""Copy embedding weights from EXAONE to LLaMA model."""
|
42 |
+
llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(llama_model.device)
|
43 |
+
|
44 |
+
def copy_layer_weights(llama_layer, exaone_layer, device):
|
45 |
+
"""Copy weights for a single layer from EXAONE to LLaMA model."""
|
46 |
+
# Self-attention
|
47 |
+
llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(device)
|
48 |
+
llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(device)
|
49 |
+
llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(device)
|
50 |
+
llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(device)
|
51 |
+
# MLP
|
52 |
+
llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(device)
|
53 |
+
llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(device)
|
54 |
+
llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(device)
|
55 |
+
# Layer Norms
|
56 |
+
llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(device)
|
57 |
+
llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(device)
|
58 |
+
|
59 |
+
def copy_final_weights(llama_model, exaone_model):
|
60 |
+
"""Copy final layer norm and LM head weights from EXAONE to LLaMA model."""
|
61 |
+
llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(llama_model.device)
|
62 |
+
llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(llama_model.device)
|
63 |
+
|
64 |
+
def port_exaone_to_llama(exaone_model_path, llama_model_path):
|
65 |
+
print("Loading EXAONE model...")
|
66 |
+
exaone_model = load_model(exaone_model_path, ExaoneForCausalLM)
|
67 |
+
exaone_config = exaone_model.config
|
68 |
+
|
69 |
+
print("Creating LLaMA configuration...")
|
70 |
+
llama_config = create_llama_config(exaone_config)
|
71 |
+
|
72 |
+
print("Initializing LLaMA model...")
|
73 |
+
llama_model = LlamaForCausalLM(llama_config)
|
74 |
+
llama_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
75 |
+
|
76 |
+
print("Copying weights...")
|
77 |
+
copy_embedding_weights(llama_model, exaone_model)
|
78 |
+
|
79 |
+
for i in range(exaone_config.num_layers):
|
80 |
+
print(f"Copying weights for layer {i+1}/{exaone_config.num_layers}")
|
81 |
+
copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i], llama_model.device)
|
82 |
+
|
83 |
+
copy_final_weights(llama_model, exaone_model)
|
84 |
+
|
85 |
+
print("Unloading EXAONE model to free memory...")
|
86 |
+
unload_model(exaone_model)
|
87 |
+
|
88 |
+
print(f"Saving ported LLaMA model to {llama_model_path}")
|
89 |
+
llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
|
90 |
+
|
91 |
+
print("Unloading LLaMA model...")
|
92 |
+
unload_model(llama_model)
|
93 |
+
|
94 |
+
print(f"EXAONE model successfully ported to LLaMA format and saved at {llama_model_path}")
|
95 |
+
|
96 |
+
if __name__ == "__main__":
|
97 |
+
exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
|
98 |
+
llama_model_path = "./exa_llamafied"
|
99 |
+
port_exaone_to_llama(exaone_model_path, llama_model_path)
|
100 |
+
```
|