Uploaded model

  • Developed by: YukiIso
  • License: apache-2.0
  • Finetuned from model : llm-jp/llm-jp-3-13b

Code

# 必要なライブラリをインストール
# python 3.10.12環境を前提としています
!pip install -U pip
!pip install -U transformers
!pip install -U bitsandbytes
!pip install -U accelerate
!pip install -U datasets
!pip install -U peft
!pip install -U trl
!pip install -U wandb
!pip install ipywidgets --upgrade

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
from peft import PeftModel
import torch
from tqdm import tqdm
import json

# Hugging Faceで取得したTokenを設定
# Hugging Face Hubの[Settings > Access Tokens]で新規トークンを作成してください。
from google.colab import userdata
HF_TOKEN = userdata.get('HF_TOKEN')

# モデルIDとアダプタIDを指定
model_id = "llm-jp/llm-jp-3-13b"
adapter_id = "totsukash/llm-jp-3-13b-finetune"

# QLoRA設定
# 量子化(4bit)を行い、効率的なメモリ使用を実現
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# モデルをロード
# device_map="auto"で、利用可能なGPUやCPUに自動的に割り当て
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    token = HF_TOKEN
)

# トークナイザーをロード
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, token = HF_TOKEN)

# LoRAアダプタを統合
model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)

# データセットの読み込み
# 評価用データはjsonl形式(各行がJSONオブジェクト)で保存されている必要があります。
datasets = []
with open("/content/elyza-tasks-100-TV_0.jsonl", "r") as f:
    item = ""
    for line in f:
      line = line.strip()
      item += line
      if item.endswith("}"):  # JSONオブジェクトの終了を検出
        datasets.append(json.loads(item))
        item = ""

# 推論処理
# 各タスクの入力をモデルに渡し、生成された出力を収集します。
results = []
for data in tqdm(datasets):
    input = data["input"]
    prompt = f"""### 指示
    {input}
    ### 回答
    """
    
    # 入力トークンを生成してモデルに入力
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
    outputs = model.generate(input_ids, max_new_tokens=512, do_sample=False, repetition_penalty=1.2)
    output = tokenizer.decode(outputs[0][input_ids.size(1):], skip_special_tokens=True)
    
    results.append({"task_id": data["task_id"], "input": input, "output": output})

# # llmjp
# results = []
# for data in tqdm(datasets):

#   input = data["input"]

#   prompt = f"""### 指示
#   {input}
#   ### 回答
#   """

#   tokenized_input = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(model.device)
#   attention_mask = torch.ones_like(tokenized_input)
#   with torch.no_grad():
#       outputs = model.generate(
#           tokenized_input,
#           attention_mask=attention_mask,
#           max_new_tokens=100,
#           do_sample=False,
#           repetition_penalty=1.2,
#           pad_token_id=tokenizer.eos_token_id
#       )[0]
#   output = tokenizer.decode(outputs[tokenized_input.size(1):], skip_special_tokens=True)
#   
#   results.append({"task_id": data["task_id"], "input": input, "output": output})

# 推論結果をJSONL形式で保存
# ファイル名はアダプタIDに基づいて作成
import re
jsonl_id = re.sub(".*/", "", adapter_id)
with open(f"./{jsonl_id}-outputs.jsonl", 'w', encoding='utf-8') as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)  # 日本語対応
        f.write('\n')
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.