maywell commited on
Commit
df28c17
ยท
verified ยท
1 Parent(s): d72ae82

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +100 -3
README.md CHANGED
@@ -1,3 +1,100 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ # maywell/EXAONE-3.0-7.8B-Instruct-Llamafied
5
+
6
+ LG์—์„œ ๋™์ผ ๋ผ์ด์„ผ์Šค ์žฌ๋ฐฐํฌ์กฐ์ฐจ ๋ง‰์•„๋ฒ„๋ฆฐ ๊ด€๊ณ„๋กœ ๋ชจ๋ธ์„ ๊ณต์œ ํ•  ์ˆ˜ ์—†๊ฒŒ ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.
7
+ vLLM, ์ถ”๋ก  ๋ฐ ๊ธฐํƒ€ ํ™œ์šฉ์œผ๋กœ Llamafied ๋ชจ๋ธ์ด ํ•„์š”ํ•˜๋‹ค๋ฉด ์•„๋ž˜ ์Šคํฌ๋ฆฝํŠธ๋ฅผ ์‹คํ–‰ํ•ด์„œ ์‚ฌ์šฉํ•ด์ฃผ์‹œ๋ฉด ๊ฐ์‚ฌํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค.
8
+
9
+ ```python
10
+ import torch
11
+ from transformers import LlamaConfig, LlamaForCausalLM
12
+ from modeling_exaone import ExaoneForCausalLM
13
+ import gc
14
+
15
+ def load_model(model_path, model_class, torch_dtype=torch.bfloat16):
16
+ """Load and return a model given its path and class."""
17
+ return model_class.from_pretrained(model_path, torch_dtype=torch_dtype, device_map="auto")
18
+
19
+ def unload_model(model):
20
+ """Clear memory by deleting a model and calling the garbage collector."""
21
+ del model
22
+ gc.collect()
23
+ torch.cuda.empty_cache()
24
+
25
+ def create_llama_config(exaone_config):
26
+ """Create and return a LLaMA configuration based on EXAONE config."""
27
+ return LlamaConfig(
28
+ vocab_size=exaone_config.vocab_size,
29
+ hidden_size=exaone_config.hidden_size,
30
+ intermediate_size=exaone_config.intermediate_size,
31
+ num_hidden_layers=exaone_config.num_layers,
32
+ num_attention_heads=exaone_config.num_attention_heads,
33
+ max_position_embeddings=exaone_config.max_position_embeddings,
34
+ rms_norm_eps=exaone_config.layer_norm_epsilon,
35
+ num_key_value_heads=exaone_config.num_key_value_heads,
36
+ rope_theta=exaone_config.rope_theta,
37
+ attention_bias=False,
38
+ )
39
+
40
+ def copy_embedding_weights(llama_model, exaone_model):
41
+ """Copy embedding weights from EXAONE to LLaMA model."""
42
+ llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(llama_model.device)
43
+
44
+ def copy_layer_weights(llama_layer, exaone_layer, device):
45
+ """Copy weights for a single layer from EXAONE to LLaMA model."""
46
+ # Self-attention
47
+ llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(device)
48
+ llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(device)
49
+ llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(device)
50
+ llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(device)
51
+ # MLP
52
+ llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(device)
53
+ llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(device)
54
+ llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(device)
55
+ # Layer Norms
56
+ llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(device)
57
+ llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(device)
58
+
59
+ def copy_final_weights(llama_model, exaone_model):
60
+ """Copy final layer norm and LM head weights from EXAONE to LLaMA model."""
61
+ llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(llama_model.device)
62
+ llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(llama_model.device)
63
+
64
+ def port_exaone_to_llama(exaone_model_path, llama_model_path):
65
+ print("Loading EXAONE model...")
66
+ exaone_model = load_model(exaone_model_path, ExaoneForCausalLM)
67
+ exaone_config = exaone_model.config
68
+
69
+ print("Creating LLaMA configuration...")
70
+ llama_config = create_llama_config(exaone_config)
71
+
72
+ print("Initializing LLaMA model...")
73
+ llama_model = LlamaForCausalLM(llama_config)
74
+ llama_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
75
+
76
+ print("Copying weights...")
77
+ copy_embedding_weights(llama_model, exaone_model)
78
+
79
+ for i in range(exaone_config.num_layers):
80
+ print(f"Copying weights for layer {i+1}/{exaone_config.num_layers}")
81
+ copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i], llama_model.device)
82
+
83
+ copy_final_weights(llama_model, exaone_model)
84
+
85
+ print("Unloading EXAONE model to free memory...")
86
+ unload_model(exaone_model)
87
+
88
+ print(f"Saving ported LLaMA model to {llama_model_path}")
89
+ llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
90
+
91
+ print("Unloading LLaMA model...")
92
+ unload_model(llama_model)
93
+
94
+ print(f"EXAONE model successfully ported to LLaMA format and saved at {llama_model_path}")
95
+
96
+ if __name__ == "__main__":
97
+ exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
98
+ llama_model_path = "./exa_llamafied"
99
+ port_exaone_to_llama(exaone_model_path, llama_model_path)
100
+ ```