maywell's picture
Update Llamafy code
0a64dcc verified
|
raw
history blame
4.71 kB
metadata
license: apache-2.0

maywell/EXAONE-3.0-7.8B-Instruct-Llamafied

LG에서 동일 라이센스 재배포조차 막아버린 관계로 모델을 공유할 수 없게 되었습니다. vLLM, 추론 및 기타 활용으로 Llamafied 모델이 필요하다면 아래 스크립트를 실행해서 사용해주시면 감사하겠습니다.

아래 modeling_exaone과 configuration_exaone의 경우에는 원본 repository를 참조해주세요.

import torch
from transformers import LlamaConfig, LlamaForCausalLM
from modeling_exaone import ExaoneForCausalLM
import gc

def load_model(model_path, model_class, torch_dtype=torch.bfloat16):
    """Load and return a model given its path and class."""
    return model_class.from_pretrained(model_path, torch_dtype=torch_dtype, device_map="auto")

def unload_model(model):
    """Clear memory by deleting a model and calling the garbage collector."""
    del model
    gc.collect()
    torch.cuda.empty_cache()

def create_llama_config(exaone_config):
    """Create and return a LLaMA configuration based on EXAONE config."""
    return LlamaConfig(
        vocab_size=exaone_config.vocab_size,
        hidden_size=exaone_config.hidden_size,
        intermediate_size=exaone_config.intermediate_size,
        num_hidden_layers=exaone_config.num_layers,
        num_attention_heads=exaone_config.num_attention_heads,
        max_position_embeddings=exaone_config.max_position_embeddings,
        rms_norm_eps=exaone_config.layer_norm_epsilon,
        num_key_value_heads=exaone_config.num_key_value_heads,
        rope_theta=exaone_config.rope_theta,
        attention_bias=False,
    )

def copy_embedding_weights(llama_model, exaone_model):
    """Copy embedding weights from EXAONE to LLaMA model."""
    llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data

def copy_layer_weights(llama_layer, exaone_layer):
    """Copy weights for a single layer from EXAONE to LLaMA model."""
    # Self-attention
    llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data
    llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data
    llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data
    llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data
    # MLP
    llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data
    llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data
    llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data
    # Layer Norms
    llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data
    llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data

def copy_final_weights(llama_model, exaone_model):
    """Copy final layer norm and LM head weights from EXAONE to LLaMA model."""
    llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data
    llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data

def port_exaone_to_llama(exaone_model_path, llama_model_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print("Loading EXAONE model...")
    exaone_model = load_model(exaone_model_path, ExaoneForCausalLM).to(device)
    exaone_config = exaone_model.config

    print("Creating LLaMA configuration...")
    llama_config = create_llama_config(exaone_config)

    print("Initializing LLaMA model...")
    llama_model = LlamaForCausalLM(llama_config).to(device)

    print("Copying weights...")
    copy_embedding_weights(llama_model, exaone_model)

    for i in range(exaone_config.num_layers):
        print(f"Copying weights for layer {i+1}/{exaone_config.num_layers}")
        copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i])

    copy_final_weights(llama_model, exaone_model)

    print("Unloading EXAONE model to free memory...")
    unload_model(exaone_model)

    print(f"Saving ported LLaMA model to {llama_model_path}")
    llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")

    print("Unloading LLaMA model...")
    unload_model(llama_model)

    print(f"EXAONE model successfully ported to LLaMA format and saved at {llama_model_path}")

if __name__ == "__main__":
    exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
    llama_model_path = "./exa_llamafied"
    port_exaone_to_llama(exaone_model_path, llama_model_path)

모델을 공개해주신 LG AI Research분들께 감사의 말씀 드립니다. Original Repository